diff --git a/.wsignore b/.wsignore index 279f23c343..478724ee95 100644 --- a/.wsignore +++ b/.wsignore @@ -27,8 +27,3 @@ python/databricks/bundles/*/_models/*.py # Developed elsewhere: internal/genkit/tagging.py - -# Docsgen: -bundle/docsgen/output/resources.md -bundle/docsgen/output/reference.md -bundle/docsgen/testdata/anchors.md diff --git a/Makefile b/Makefile index 7599c2c26e..92b05e6592 100644 --- a/Makefile +++ b/Makefile @@ -130,14 +130,11 @@ snapshot: snapshot-release: goreleaser release --clean --skip docker --snapshot -schema: - go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json +schema: .codegen/openapi.json + DATABRICKS_OPENAPI_SPEC=.codegen/openapi.json go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json -schema-for-docs: - go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema_for_docs.json --docs - -docs: - go run ./bundle/docsgen ./bundle/internal/schema ./bundle/docsgen +schema-for-docs: .codegen/openapi.json + DATABRICKS_OPENAPI_SPEC=.codegen/openapi.json go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema_for_docs.json --docs INTEGRATION = go run -modfile=tools/go.mod ./tools/testrunner/main.go ${GO_TOOL} gotestsum --format github-actions --rerun-fails --jsonfile output.json --packages "./acceptance ./integration/..." -- -parallel 4 -timeout=2h @@ -202,7 +199,7 @@ bundle/direct/dresources/apitypes.generated.yml: ./bundle/direct/tools/generate_ bundle/direct/dresources/resources.generated.yml: ./bundle/direct/tools/generate_resources.py .codegen/openapi.json bundle/direct/dresources/apitypes.generated.yml bundle/direct/dresources/apitypes.yml acceptance/bundle/refschema/out.fields.txt python3 $^ > $@ -.PHONY: lint lintfull tidy lintcheck fmt fmtfull test test-unit test-acc test-slow test-slow-unit test-slow-acc cover showcover build snapshot snapshot-release schema schema-for-docs integration integration-short acc-cover acc-showcover docs ws wsfix links checks test-update test-update-templates generate-out-test-toml test-update-aws test-update-all generate-validation +.PHONY: lint lintfull tidy lintcheck fmt fmtfull test test-unit test-acc test-slow test-slow-unit test-slow-acc cover showcover build snapshot snapshot-release schema schema-for-docs integration integration-short acc-cover acc-showcover ws wsfix links checks test-update test-update-templates generate-out-test-toml test-update-aws test-update-all generate-validation test-exp-aitools: make test TEST_PACKAGES="./experimental/aitools/..." ACCEPTANCE_TEST_FILTER="TestAccept/apps" diff --git a/bundle/docsgen/README.md b/bundle/docsgen/README.md deleted file mode 100644 index 220a14c1c9..0000000000 --- a/bundle/docsgen/README.md +++ /dev/null @@ -1,79 +0,0 @@ -## docs-autogen - -1. Install [Golang](https://go.dev/doc/install) -2. Run `make docs` from the repo -3. See generated documents in `./bundle/docsgen/output` directory -4. To change descriptions update content in `./bundle/internal/schema/annotations.yml` or `./bundle/internal/schema/annotations_openapi_overrides.yml` and re-run `make docs` - -For simpler usage run it together with copy command to move resulting files to local `docs` repo. Note that it will overwrite any local changes in affected files. Example: - -``` -make docs && cp bundle/docgen/output/*.md ../docs/source/dev-tools/bundles -``` - -To change intro sections for files update them in `templates/` directory - -### Annotation file structure - -```yaml -"": - "": - description: Description of the property, only plain text is supported - markdown_description: Description with markdown support, if defined it will override the value in docs and in JSON-schema - markdown_examples: Custom block for any example, in free form, Markdown is supported - title: JSON-schema title, not used in docs - default: Default value of the property, not used in docs - enum: Possible values of enum-type, not used in docs -``` - -Descriptions with `PLACEHOLDER` value are not displayed in docs and JSON-schema - -All relative links like `[_](/dev-tools/bundles/settings.md#cluster_id)` are kept as is in docs but converted to absolute links in JSON schema - -To change description for type itself (not its fields) use `"_"`: - -```yaml -github.com/databricks/cli/bundle/config/resources.Cluster: - "_": - "markdown_description": |- - The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). -``` - -### Example annotation - -```yaml -github.com/databricks/cli/bundle/config.Bundle: - "cluster_id": - "description": |- - The ID of a cluster to use to run the bundle. - "markdown_description": |- - The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). - "compute_id": - "description": |- - PLACEHOLDER - "databricks_cli_version": - "description": |- - The Databricks CLI version to use for the bundle. - "markdown_description": |- - The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). - "deployment": - "description": |- - The definition of the bundle deployment - "markdown_description": |- - The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). - "git": - "description": |- - The Git version control details that are associated with your bundle. - "markdown_description": |- - The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). - "name": - "description": |- - The name of the bundle. - "uuid": - "description": |- - PLACEHOLDER -``` - -### TODO - -Add file watcher to track changes in the annotation files and re-run `make docs` script automtically diff --git a/bundle/docsgen/main.go b/bundle/docsgen/main.go deleted file mode 100644 index 31dae7533f..0000000000 --- a/bundle/docsgen/main.go +++ /dev/null @@ -1,152 +0,0 @@ -package main - -import ( - "fmt" - "log" - "os" - "path" - "reflect" - "strings" - "time" - - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/internal/annotation" - "github.com/databricks/cli/libs/jsonschema" -) - -const ( - rootFileName = "reference.md" - resourcesFileName = "resources.md" -) - -func main() { - if len(os.Args) != 3 { - fmt.Println("Usage: go run main.go ") - os.Exit(1) - } - - annotationDir := os.Args[1] - docsDir := os.Args[2] - outputDir := path.Join(docsDir, "output") - templatesDir := path.Join(docsDir, "templates") - - if _, err := os.Stat(outputDir); os.IsNotExist(err) { - if err := os.MkdirAll(outputDir, 0o755); err != nil { - log.Fatal(err) - } - } - - rootHeader, err := os.ReadFile(path.Join(templatesDir, rootFileName)) - if err != nil { - log.Fatal(err) - } - err = generateDocs( - []string{path.Join(annotationDir, "annotations.yml")}, - path.Join(outputDir, rootFileName), - reflect.TypeOf(config.Root{}), - fillTemplateVariables(string(rootHeader)), - ) - if err != nil { - log.Fatal(err) - } - resourcesHeader, err := os.ReadFile(path.Join(templatesDir, resourcesFileName)) - if err != nil { - log.Fatal(err) - } - err = generateDocs( - []string{path.Join(annotationDir, "annotations_openapi.yml"), path.Join(annotationDir, "annotations_openapi_overrides.yml"), path.Join(annotationDir, "annotations.yml")}, - path.Join(outputDir, resourcesFileName), - reflect.TypeOf(config.Resources{}), - fillTemplateVariables(string(resourcesHeader)), - ) - if err != nil { - log.Fatal(err) - } -} - -func generateDocs(inputPaths []string, outputPath string, rootType reflect.Type, header string) error { - annotations, err := annotation.LoadAndMerge(inputPaths) - if err != nil { - log.Fatal(err) - } - - // schemas is used to resolve references to schemas - schemas := map[string]*jsonschema.Schema{} - // ownFields is used to track fields that are defined in the annotation file and should be included in the docs page - ownFields := map[string]bool{} - - s, err := jsonschema.FromType(rootType, []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ - func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { - _, isOwnField := annotations[jsonschema.TypePath(typ)] - if isOwnField { - ownFields[jsonschema.TypePath(typ)] = true - } - - refPath := getPath(typ) - shouldHandle := strings.HasPrefix(refPath, "github.com") - if !shouldHandle { - schemas[jsonschema.TypePath(typ)] = &s - return s - } - - a := annotations[refPath] - if a == nil { - a = map[string]annotation.Descriptor{} - } - - rootTypeAnnotation, ok := a["_"] - if ok { - assignAnnotation(&s, rootTypeAnnotation) - } - - for k, v := range s.Properties { - assignAnnotation(v, a[k]) - } - - schemas[jsonschema.TypePath(typ)] = &s - return s - }, - }) - if err != nil { - log.Fatal(err) - } - - nodes := buildNodes(s, schemas, ownFields) - err = buildMarkdown(nodes, outputPath, header) - if err != nil { - log.Fatal(err) - } - return nil -} - -func getPath(typ reflect.Type) string { - return typ.PkgPath() + "." + typ.Name() -} - -func assignAnnotation(s *jsonschema.Schema, a annotation.Descriptor) { - if a.Description != "" && a.Description != annotation.Placeholder { - s.Description = a.Description - } - if a.MarkdownDescription != "" { - s.MarkdownDescription = a.MarkdownDescription - } - if a.MarkdownExamples != "" { - s.Examples = []string{a.MarkdownExamples} - } - if a.DeprecationMessage != "" { - s.Deprecated = true - s.DeprecationMessage = a.DeprecationMessage - } - if a.Preview == "PRIVATE" { - s.DoNotSuggest = true - s.Preview = a.Preview - } - if a.OutputOnly != nil && *a.OutputOnly { - s.DoNotSuggest = true - } -} - -func fillTemplateVariables(s string) string { - currentDate := time.Now().Format("2006-01-02") - return strings.ReplaceAll(s, "{{update_date}}", currentDate) -} diff --git a/bundle/docsgen/markdown.go b/bundle/docsgen/markdown.go deleted file mode 100644 index 4055e362bd..0000000000 --- a/bundle/docsgen/markdown.go +++ /dev/null @@ -1,111 +0,0 @@ -package main - -import ( - "fmt" - "log" - "os" - "strings" -) - -func buildMarkdown(nodes []rootNode, outputFile, header string) error { - m := newMardownRenderer() - m = m.PlainText(header) - for _, node := range nodes { - m = m.LF() - title := node.Title - if node.TopLevel { - m = m.H2(title) - } else { - m = m.H3(title) - } - m = m.LF() - - if node.Type != "" { - m = m.PlainText(fmt.Sprintf("**`Type: %s`**", node.Type)) - m = m.LF() - } - m = m.PlainText(node.Description) - m = m.LF() - - if len(node.ObjectKeyAttributes) > 0 { - n := pickLastWord(node.Title) - n = removePluralForm(n) - m = m.CodeBlocks("yaml", fmt.Sprintf("%ss:\n <%s-name>:\n <%s-field-name>: <%s-field-value>", n, n, n, n)) - m = m.LF() - m = buildAttributeTable(m, node.ObjectKeyAttributes) - } else if len(node.ArrayItemAttributes) > 0 { - m = m.LF() - m = buildAttributeTable(m, node.ArrayItemAttributes) - } else if len(node.Attributes) > 0 { - m = m.LF() - m = buildAttributeTable(m, node.Attributes) - } - - if node.Example != "" { - m = m.LF() - m = m.PlainText("**Example**") - m = m.LF() - m = m.PlainText(node.Example) - } - } - - f, err := os.Create(outputFile) - if err != nil { - log.Fatal(err) - } - _, err = f.WriteString(m.String()) - if err != nil { - log.Fatal(err) - } - return f.Close() -} - -func pickLastWord(s string) string { - words := strings.Split(s, ".") - return words[len(words)-1] -} - -// Build a custom table which we use in Databricks website -func buildAttributeTable(m *markdownRenderer, attributes []attributeNode) *markdownRenderer { - m = m.LF() - m = m.PlainText(":::list-table") - m = m.LF() - - m = m.PlainText("- - Key") - m = m.PlainText(" - Type") - m = m.PlainText(" - Description") - m = m.LF() - - for _, a := range attributes { - m = m.PlainText("- - " + fmt.Sprintf("`%s`", a.Title)) - m = m.PlainText(" - " + a.Type) - m = m.PlainText(" - " + formatDescription(a)) - m = m.LF() - } - - m = m.PlainText(":::") - m = m.LF() - - return m -} - -func formatDescription(a attributeNode) string { - s := strings.ReplaceAll(a.Description, "\n", " ") - if a.Link != "" { - if strings.HasSuffix(s, ".") { - s += " " - } else if s != "" { - s += ". " - } - s += fmt.Sprintf("See [\\_](#%s).", cleanAnchor(a.Link)) - } - return s -} - -// Docs framework does not allow special characters in anchor links and strip them out by default -// We need to clean them up to make sure the links pass the validation -func cleanAnchor(s string) string { - s = strings.ReplaceAll(s, ".", "") - s = strings.ReplaceAll(s, nameFieldWithFormat, nameField) - return s -} diff --git a/bundle/docsgen/markdown_test.go b/bundle/docsgen/markdown_test.go deleted file mode 100644 index 5463eeef3c..0000000000 --- a/bundle/docsgen/markdown_test.go +++ /dev/null @@ -1,42 +0,0 @@ -package main - -import ( - "path/filepath" - "testing" - - "github.com/databricks/cli/internal/testutil" - "github.com/stretchr/testify/require" -) - -func TestBuildMarkdownAnchors(t *testing.T) { - nodes := []rootNode{ - { - Title: "some_field", - TopLevel: true, - Type: "Map", - Description: "This is a description", - Attributes: []attributeNode{ - { - Title: "my_attribute", - Type: "Map", - Description: "Desc with link", - Link: "some_field._name_.my_attribute", - }, - }, - }, - { - Title: "some_field._name_.my_attribute", - TopLevel: false, - Type: "Boolean", - Description: "Another description", - }, - } - tmpDir := t.TempDir() - path := filepath.Join(tmpDir, "output.md") - - err := buildMarkdown(nodes, path, "Header") - require.NoError(t, err) - - expected := testutil.ReadFile(t, "testdata/anchors.md") - testutil.AssertFileContents(t, path, expected) -} diff --git a/bundle/docsgen/nodes.go b/bundle/docsgen/nodes.go deleted file mode 100644 index 41d37f338c..0000000000 --- a/bundle/docsgen/nodes.go +++ /dev/null @@ -1,246 +0,0 @@ -package main - -import ( - "sort" - "strings" - - "github.com/databricks/cli/libs/jsonschema" -) - -// rootNode is an intermediate representation of resolved JSON-schema item that is used to generate documentation -// Every schema node goes follows this conversion `JSON-schema -> rootNode -> markdown text` -type rootNode struct { - Title string - Description string - Attributes []attributeNode - Example string - ObjectKeyAttributes []attributeNode - ArrayItemAttributes []attributeNode - TopLevel bool - Type string -} - -type attributeNode struct { - Title string - Type string - Description string - Link string -} - -type rootProp struct { - // k is the name of the property - k string - // v is the corresponding json-schema node - v *jsonschema.Schema - // topLevel is true only for direct properties of the schema of root type (e.g. config.Root or config.Resources) - // Example: config.Root has . - topLevel bool - // circular indicates if property was added by recursive type, e.g. task.for_each_task.task.for_each_task - // These entries don't expand further and don't add any new nodes from their properties - circular bool -} - -const MapType = "Map" - -// buildNodes converts JSON-schema to a flat list of rootNode items that are then used to generate markdown documentation -// It recursively traverses the schema expanding the resulting list with new items for every properties of nodes `object` and `array` type -func buildNodes(s jsonschema.Schema, refs map[string]*jsonschema.Schema, ownFields map[string]bool) []rootNode { - var rootProps []rootProp - for k, v := range s.Properties { - rootProps = append(rootProps, rootProp{k, v, true, false}) - } - nodes := make([]rootNode, 0, len(rootProps)) - visited := make(map[string]bool) - - for i := 0; i < len(rootProps); i++ { - item := rootProps[i] - k := item.k - v := item.v - - if visited[k] { - continue - } - visited[k] = true - v = resolveRefs(v, refs) - - if v.Deprecated { - continue - } - if v.DoNotSuggest { - continue - } - - node := rootNode{ - Title: k, - Description: getDescription(v), - TopLevel: item.topLevel, - Example: getExample(v), - Type: getHumanReadableType(v.Type), - } - - hasProperties := len(v.Properties) > 0 - if hasProperties { - node.Attributes = getAttributes(v.Properties, refs, ownFields, k, item.circular) - } - - mapValueType := getMapValueType(v, refs) - if mapValueType != nil { - d := getDescription(mapValueType) - if d != "" { - node.Description = d - } - if node.Example == "" { - node.Example = getExample(mapValueType) - } - node.ObjectKeyAttributes = getAttributes(mapValueType.Properties, refs, ownFields, getMapKeyPrefix(k), item.circular) - } - - arrayItemType := resolveRefs(v.Items, refs) - if arrayItemType != nil { - node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs, ownFields, k, item.circular) - } - - nodes = append(nodes, node) - - // Whether we should add new root props from the children of the current JSON-schema node to include their definitions to this document - shouldAddNewProps := !item.circular - if shouldAddNewProps { - var newProps []rootProp - // Adds node with definition for the properties. Example: - // bundle: - // prop-name: - if hasProperties { - newProps = append(newProps, extractNodes(k, v.Properties, refs, ownFields)...) - } - - // Adds node with definition for the type of array item. Example: - // permissions: - // - - if arrayItemType != nil { - newProps = append(newProps, extractNodes(k, arrayItemType.Properties, refs, ownFields)...) - } - // Adds node with definition for the type of the Map value. Example: - // targets: - // : - if mapValueType != nil { - newProps = append(newProps, extractNodes(getMapKeyPrefix(k), mapValueType.Properties, refs, ownFields)...) - } - - rootProps = append(rootProps, newProps...) - } - } - - sort.Slice(nodes, func(i, j int) bool { - return nodes[i].Title < nodes[j].Title - }) - return nodes -} - -func getMapValueType(v *jsonschema.Schema, refs map[string]*jsonschema.Schema) *jsonschema.Schema { - additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) - if ok { - return resolveRefs(additionalProps, refs) - } - return nil -} - -const ( - nameField = "name" - nameFieldWithFormat = "_name_" -) - -func getMapKeyPrefix(s string) string { - return s + "." + nameFieldWithFormat -} - -func removePluralForm(s string) string { - if strings.HasSuffix(s, "s") { - return strings.TrimSuffix(s, "s") - } - return s -} - -func getHumanReadableType(t jsonschema.Type) string { - typesMapping := map[string]string{ - "string": "String", - "integer": "Integer", - "boolean": "Boolean", - "array": "Sequence", - "object": "Map", - } - return typesMapping[string(t)] -} - -func getAttributes(props, refs map[string]*jsonschema.Schema, ownFields map[string]bool, prefix string, circular bool) []attributeNode { - var attributes []attributeNode - for k, v := range props { - v = resolveRefs(v, refs) - if v.DoNotSuggest { - continue - } - typeString := getHumanReadableType(v.Type) - if typeString == "" { - typeString = "Any" - } - var reference string - if isReferenceType(v, refs, ownFields) && !circular && !v.Deprecated { - reference = prefix + "." + k - } - attributes = append(attributes, attributeNode{ - Title: k, - Type: typeString, - Description: getDescription(v), - Link: reference, - }) - } - sort.Slice(attributes, func(i, j int) bool { - return attributes[i].Title < attributes[j].Title - }) - return attributes -} - -func getDescription(s *jsonschema.Schema) string { - if s.DeprecationMessage != "" { - return s.DeprecationMessage - } - if s.MarkdownDescription != "" { - return s.MarkdownDescription - } - return s.Description -} - -func shouldExtract(ref string, ownFields map[string]bool) bool { - if i := strings.Index(ref, "github.com"); i >= 0 { - ref = ref[i:] - } - _, isCustomField := ownFields[ref] - return isCustomField -} - -// extractNodes returns a list of rootProp items for all properties of the json-schema node that should be extracted based on context -// E.g. we extract all propert -func extractNodes(prefix string, props, refs map[string]*jsonschema.Schema, ownFields map[string]bool) []rootProp { - var nodes []rootProp - for k, v := range props { - if v.Reference != nil && !shouldExtract(*v.Reference, ownFields) { - continue - } - v = resolveRefs(v, refs) - if v.Type == "object" || v.Type == "array" { - nodes = append(nodes, rootProp{prefix + "." + k, v, false, isCycleField(k)}) - } - } - return nodes -} - -func isCycleField(field string) bool { - return field == "for_each_task" -} - -func getExample(v *jsonschema.Schema) string { - examples := getExamples(v.Examples) - if len(examples) == 0 { - return "" - } - return examples[0] -} diff --git a/bundle/docsgen/nodes_test.go b/bundle/docsgen/nodes_test.go deleted file mode 100644 index 028bf1eff6..0000000000 --- a/bundle/docsgen/nodes_test.go +++ /dev/null @@ -1,176 +0,0 @@ -package main - -import ( - "testing" - - "github.com/databricks/cli/libs/jsonschema" - "github.com/stretchr/testify/assert" -) - -func TestBuildNodes_ChildExpansion(t *testing.T) { - tests := []struct { - name string - schema jsonschema.Schema - refs map[string]*jsonschema.Schema - ownFields map[string]bool - wantNodes []rootNode - }{ - { - name: "array expansion", - schema: jsonschema.Schema{ - Type: "object", - Properties: map[string]*jsonschema.Schema{ - "list": { - Type: "array", - Items: &jsonschema.Schema{ - Type: "object", - Properties: map[string]*jsonschema.Schema{ - "listSub": {Reference: strPtr("#/$defs/github.com/listSub")}, - }, - }, - }, - }, - }, - refs: map[string]*jsonschema.Schema{ - "github.com/listSub": {Type: "array", Items: &jsonschema.Schema{Type: "object", Properties: map[string]*jsonschema.Schema{"subField": {Type: "string"}}}}, - }, - ownFields: map[string]bool{"github.com/listSub": true}, - wantNodes: []rootNode{ - { - Title: "list", - TopLevel: true, - Type: "Sequence", - ArrayItemAttributes: []attributeNode{ - {Title: "listSub", Type: "Sequence", Link: "list.listSub"}, - }, - }, - { - Title: "list.listSub", - Type: "Sequence", - ArrayItemAttributes: []attributeNode{ - {Title: "subField", Type: "String"}, - }, - }, - }, - }, - { - name: "map expansion", - schema: jsonschema.Schema{ - Type: "object", - Properties: map[string]*jsonschema.Schema{ - "myMap": { - Type: "object", - AdditionalProperties: &jsonschema.Schema{ - Reference: strPtr("#/$defs/github.com/myMap"), - Properties: map[string]*jsonschema.Schema{ - "mapSub": {Type: "object", Reference: strPtr("#/$defs/github.com/mapSub")}, - }, - }, - }, - }, - }, - refs: map[string]*jsonschema.Schema{ - "github.com/myMap": { - Type: "object", - Properties: map[string]*jsonschema.Schema{ - "mapSub": {Type: "boolean", Reference: strPtr("#/$defs/github.com/mapSub")}, - }, - }, - "github.com/mapSub": { - Type: "object", - Properties: map[string]*jsonschema.Schema{ - "deepSub": {Type: "boolean"}, - }, - }, - }, - ownFields: map[string]bool{ - "github.com/myMap": true, - "github.com/mapSub": true, - }, - wantNodes: []rootNode{ - { - Title: "myMap", - TopLevel: true, - Type: "Map", - ObjectKeyAttributes: []attributeNode{ - {Title: "mapSub", Type: "Map", Link: "myMap._name_.mapSub"}, - }, - }, - { - Title: "myMap._name_.mapSub", - Type: "Map", - Attributes: []attributeNode{ - {Title: "deepSub", Type: "Boolean"}, - }, - }, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := buildNodes(tt.schema, tt.refs, tt.ownFields) - assert.Equal(t, tt.wantNodes, got) - }) - } -} - -func TestDeprecatedFields(t *testing.T) { - s := jsonschema.Schema{ - Type: "object", - Properties: map[string]*jsonschema.Schema{ - "deprecatedField": {Deprecated: true}, - "notDeprecatedField": { - Properties: map[string]*jsonschema.Schema{ - "nestedDeprecatedField": { - Deprecated: true, - Description: "nested description", - Extension: jsonschema.Extension{ - DeprecationMessage: "nested deprecation message", - }, - }, - "nestedNotDeprecatedField": {}, - }, - }, - }, - } - nodes := buildNodes(s, nil, nil) - assert.Len(t, nodes, 1) - assert.Equal(t, "notDeprecatedField", nodes[0].Title) - - assert.Len(t, nodes[0].Attributes, 2) - assert.Equal(t, "nested deprecation message", nodes[0].Attributes[0].Description) -} - -func TestDoNotSuggestFields(t *testing.T) { - s := jsonschema.Schema{ - Type: "object", - Properties: map[string]*jsonschema.Schema{ - "doNotSuggestField": {Extension: jsonschema.Extension{DoNotSuggest: true}}, - "notDoNotSuggestField": { - Properties: map[string]*jsonschema.Schema{ - "nestedDoNotSuggestField": { - Description: "nested description", - Extension: jsonschema.Extension{ - DeprecationMessage: "nested do message", - DoNotSuggest: true, - }, - }, - "nestedNotDoNotSuggestField": { - Description: "nested suggested field", - }, - }, - }, - }, - } - nodes := buildNodes(s, nil, nil) - assert.Len(t, nodes, 1) - assert.Equal(t, "notDoNotSuggestField", nodes[0].Title) - - assert.Len(t, nodes[0].Attributes, 1) - assert.Equal(t, "nestedNotDoNotSuggestField", nodes[0].Attributes[0].Title) -} - -func strPtr(s string) *string { - return &s -} diff --git a/bundle/docsgen/output/reference.md b/bundle/docsgen/output/reference.md deleted file mode 100644 index 97449aca78..0000000000 --- a/bundle/docsgen/output/reference.md +++ /dev/null @@ -1,1783 +0,0 @@ ---- -description: 'Configuration reference for databricks.yml' -last_update: - date: 2025-09-13 ---- - - - -# Configuration reference - -This article provides reference for keys supported by :re[DABS] configuration (YAML). See [\_](/dev-tools/bundles/index.md). - -For complete bundle examples, see [\_](/dev-tools/bundles/resource-examples.md) and the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples). - - -## artifacts - -**`Type: Map`** - -Defines the attributes to build artifacts, where each key is the name of the artifact, and the value is a Map that defines the artifact build settings. For information about the `artifacts` mapping, see [\_](/dev-tools/bundles/settings.md#artifacts). - -Artifact settings defined in the top level of the bundle configuration can be overridden in the `targets` mapping. See [\_](/dev-tools/bundles/artifact-overrides.md). - -```yaml -artifacts: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `build` - - String - - An optional set of build commands to run locally before deployment. - -- - `dynamic_version` - - Boolean - - Whether to patch the wheel version dynamically based on the timestamp of the whl file. If this is set to `true`, new code can be deployed without having to update the version in `setup.py` or `pyproject.toml`. This setting is only valid when `type` is set to `whl`. See [\_](/dev-tools/bundles/settings.md#bundle-syntax-mappings-artifacts). - -- - `executable` - - String - - The executable type. Valid values are `bash`, `sh`, and `cmd`. - -- - `files` - - Sequence - - The relative or absolute path to the built artifact files. See [\_](#artifactsnamefiles). - -- - `path` - - String - - The local path of the directory for the artifact. - -- - `type` - - String - - Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`. - -::: - - -**Example** - -```yaml -artifacts: - default: - type: whl - build: poetry build - path: . -``` - -### artifacts._name_.files - -**`Type: Sequence`** - -The relative or absolute path to the built artifact files. - - - -:::list-table - -- - Key - - Type - - Description - -- - `source` - - String - - Required. The artifact source file. - -::: - - -## bundle - -**`Type: Map`** - -The bundle attributes when deploying to this target, - - - -:::list-table - -- - Key - - Type - - Description - -- - `cluster_id` - - String - - The ID of a cluster to use to run the bundle. See [\_](/dev-tools/bundles/settings.md#cluster_id). - -- - `compute_id` - - String - - Deprecated. The ID of the compute to use to run the bundle. - -- - `databricks_cli_version` - - String - - The Databricks CLI version to use for the bundle. See [\_](/dev-tools/bundles/settings.md#databricks_cli_version). - -- - `deployment` - - Map - - The definition of the bundle deployment. For supported attributes see [\_](/dev-tools/bundles/deployment-modes.md). See [\_](#bundledeployment). - -- - `git` - - Map - - The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). See [\_](#bundlegit). - -- - `name` - - String - - The name of the bundle. - -- - `uuid` - - String - - Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). - -::: - - -### bundle.deployment - -**`Type: Map`** - -The definition of the bundle deployment. For supported attributes see [\_](/dev-tools/bundles/deployment-modes.md). - - - -:::list-table - -- - Key - - Type - - Description - -- - `fail_on_active_runs` - - Boolean - - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. - -- - `lock` - - Map - - The deployment lock attributes. See [\_](#bundledeploymentlock). - -::: - - -### bundle.deployment.lock - -**`Type: Map`** - -The deployment lock attributes. - - - -:::list-table - -- - Key - - Type - - Description - -- - `enabled` - - Boolean - - Whether this lock is enabled. - -- - `force` - - Boolean - - Whether to force this lock if it is enabled. - -::: - - -### bundle.git - -**`Type: Map`** - -The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). - - - -:::list-table - -- - Key - - Type - - Description - -- - `branch` - - String - - The Git branch name. See [\_](/dev-tools/bundles/settings.md#git). - -- - `origin_url` - - String - - The origin URL of the repository. See [\_](/dev-tools/bundles/settings.md#git). - -::: - - -## experimental - -**`Type: Map`** - -Defines attributes for experimental features. - - - -:::list-table - -- - Key - - Type - - Description - -- - `pydabs` - - Map - - Deprecated: please use python instead - -- - `python` - - Map - - Configures loading of Python code defined with 'databricks-bundles' package. See [\_](#experimentalpython). - -- - `python_wheel_wrapper` - - Boolean - - Whether to use a Python wheel wrapper. - -- - `scripts` - - Map - - The commands to run. - -- - `skip_artifact_cleanup` - - Boolean - - Determines whether to skip cleaning up the .internal folder - -- - `skip_name_prefix_for_schema` - - Boolean - - Skip adding the prefix that is either set in `presets.name_prefix` or computed when `mode: development` is set, to the names of UC schemas defined in the bundle. - -- - `use_legacy_run_as` - - Boolean - - Whether to use the legacy run_as behavior. - -::: - - -### experimental.python - -**`Type: Map`** - -Configures loading of Python code defined with 'databricks-bundles' package. - - - -:::list-table - -- - Key - - Type - - Description - -- - `mutators` - - Sequence - - Mutators contains a list of fully qualified function paths to mutator functions. Example: ["my_project.mutators:add_default_cluster"] - -- - `resources` - - Sequence - - Resources contains a list of fully qualified function paths to load resources defined in Python code. Example: ["my_project.resources:load_resources"] - -- - `venv_path` - - String - - VEnvPath is path to the virtual environment. If enabled, Python code will execute within this environment. If disabled, it defaults to using the Python interpreter available in the current shell. - -::: - - -## include - -**`Type: Sequence`** - -Specifies a list of path globs that contain configuration files to include within the bundle. See [\_](/dev-tools/bundles/settings.md#include). - - -## permissions - -**`Type: Sequence`** - -A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity. - -See [\_](/dev-tools/bundles/settings.md#permissions) and [\_](/dev-tools/bundles/permissions.md). - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - The name of the group that has the permission set in level. - -- - `level` - - String - - The allowed permission for user, group, service principal defined for this permission. - -- - `service_principal_name` - - String - - The name of the service principal that has the permission set in level. - -- - `user_name` - - String - - The name of the user that has the permission set in level. - -::: - - -**Example** - -```yaml -permissions: - - level: CAN_VIEW - group_name: test-group - - level: CAN_MANAGE - user_name: someone@example.com - - level: CAN_RUN - service_principal_name: 123456-abcdef -``` - -## presets - -**`Type: Map`** - -Defines bundle deployment presets. See [\_](/dev-tools/bundles/deployment-modes.md#presets). - - - -:::list-table - -- - Key - - Type - - Description - -- - `artifacts_dynamic_version` - - Boolean - - Whether to enable dynamic_version on all artifacts. - -- - `jobs_max_concurrent_runs` - - Integer - - The maximum concurrent runs for a job. - -- - `name_prefix` - - String - - The prefix for job runs of the bundle. - -- - `pipelines_development` - - Boolean - - Whether pipeline deployments should be locked in development mode. - -- - `source_linked_deployment` - - Boolean - - Whether to link the deployment to the bundle source. - -- - `tags` - - Map - - The tags for the bundle deployment. - -- - `trigger_pause_status` - - String - - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. - -::: - - -## resources - -**`Type: Map`** - -A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. For more information about Databricks Asset Bundles supported resources, and resource definition reference, see [\_](/dev-tools/bundles/resources.md). - -```yaml -resources: - : - : - : -``` - - - -:::list-table - -- - Key - - Type - - Description - -- - `apps` - - Map - - The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). - -- - `clusters` - - Map - - The cluster definitions for the bundle, where each key is the name of a cluster. See [\_](/dev-tools/bundles/resources.md#clusters). - -- - `dashboards` - - Map - - The dashboard definitions for the bundle, where each key is the name of the dashboard. See [\_](/dev-tools/bundles/resources.md#dashboards). - -- - `database_catalogs` - - Map - - - -- - `database_instances` - - Map - - - -- - `experiments` - - Map - - The experiment definitions for the bundle, where each key is the name of the experiment. See [\_](/dev-tools/bundles/resources.md#experiments). - -- - `jobs` - - Map - - The job definitions for the bundle, where each key is the name of the job. See [\_](/dev-tools/bundles/resources.md#jobs). - -- - `model_serving_endpoints` - - Map - - The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [\_](/dev-tools/bundles/resources.md#model_serving_endpoints). - -- - `models` - - Map - - The model definitions for the bundle, where each key is the name of the model. See [\_](/dev-tools/bundles/resources.md#models). - -- - `pipelines` - - Map - - The pipeline definitions for the bundle, where each key is the name of the pipeline. See [\_](/dev-tools/bundles/resources.md#pipelines). - -- - `quality_monitors` - - Map - - The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [\_](/dev-tools/bundles/resources.md#quality_monitors). - -- - `registered_models` - - Map - - The registered model definitions for the bundle, where each key is the name of the Unity Catalog registered model. See [\_](/dev-tools/bundles/resources.md#registered_models) - -- - `schemas` - - Map - - The schema definitions for the bundle, where each key is the name of the schema. See [\_](/dev-tools/bundles/resources.md#schemas). - -- - `secret_scopes` - - Map - - The secret scope definitions for the bundle, where each key is the name of the secret scope. See [\_](/dev-tools/bundles/resources.md#secret_scopes). See [\_](#resourcessecret_scopes). - -- - `sql_warehouses` - - Map - - The SQL warehouse definitions for the bundle, where each key is the name of the warehouse. See [\_](/dev-tools/bundles/resources.md#sql_warehouses). - -- - `synced_database_tables` - - Map - - See [\_](#resourcessynced_database_tables). - -- - `volumes` - - Map - - The volume definitions for the bundle, where each key is the name of the volume. See [\_](/dev-tools/bundles/resources.md#volumes). - -::: - - -### resources.secret_scopes - -**`Type: Map`** - -The secret scope definitions for the bundle, where each key is the name of the secret scope. See [\_](/dev-tools/bundles/resources.md#secret_scopes). - -```yaml -secret_scopes: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `backend_type` - - String - - The backend type the scope will be created with. If not specified, will default to `DATABRICKS` - -- - `keyvault_metadata` - - Map - - The metadata for the secret scope if the `backend_type` is `AZURE_KEYVAULT`. See [\_](#resourcessecret_scopesnamekeyvault_metadata). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#resourcessecret_scopesnamelifecycle). - -- - `name` - - String - - Scope name requested by the user. Scope names are unique. - -- - `permissions` - - Sequence - - The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. See [\_](#resourcessecret_scopesnamepermissions). - -::: - - -### resources.secret_scopes._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### resources.secret_scopes._name_.permissions - -**`Type: Sequence`** - -The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - The name of the group that has the permission set in level. This field translates to a `principal` field in secret scope ACL. - -- - `level` - - String - - The allowed permission for user, group, service principal defined for this permission. - -- - `service_principal_name` - - String - - The application ID of an active service principal. This field translates to a `principal` field in secret scope ACL. - -- - `user_name` - - String - - The name of the user that has the permission set in level. This field translates to a `principal` field in secret scope ACL. - -::: - - -### resources.synced_database_tables - -**`Type: Map`** - - - -```yaml -synced_database_tables: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `data_synchronization_status` - - Map - - See [\_](#resourcessynced_database_tablesnamedata_synchronization_status). - -- - `database_instance_name` - - String - - - -- - `effective_database_instance_name` - - String - - - -- - `effective_logical_database_name` - - String - - - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#resourcessynced_database_tablesnamelifecycle). - -- - `logical_database_name` - - String - - - -- - `name` - - String - - - -- - `spec` - - Map - - See [\_](#resourcessynced_database_tablesnamespec). - -- - `unity_catalog_provisioning_state` - - String - - - -::: - - -### resources.synced_database_tables._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -## run_as - -**`Type: Map`** - -The identity to use when running Databricks Asset Bundles workflows. See [\_](/dev-tools/bundles/run-as.md). - - - -:::list-table - -- - Key - - Type - - Description - -- - `service_principal_name` - - String - - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - -- - `user_name` - - String - - The email of an active workspace user. Non-admin users can only set this field to their own email. - -::: - - -## scripts - -**`Type: Map`** - - - -```yaml -scripts: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `content` - - String - - - -::: - - -## sync - -**`Type: Map`** - -The files and file paths to include or exclude in the bundle. See [\_](/dev-tools/bundles/settings.md#sync). - - - -:::list-table - -- - Key - - Type - - Description - -- - `exclude` - - Sequence - - A list of files or folders to exclude from the bundle. - -- - `include` - - Sequence - - A list of files or folders to include in the bundle. - -- - `paths` - - Sequence - - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. - -::: - - -## targets - -**`Type: Map`** - -Defines deployment targets for the bundle. See [\_](/dev-tools/bundles/settings.md#targets) - -```yaml -targets: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `artifacts` - - Map - - The artifacts to include in the target deployment. See [\_](#targetsnameartifacts). - -- - `bundle` - - Map - - The bundle attributes when deploying to this target. See [\_](#targetsnamebundle). - -- - `cluster_id` - - String - - The ID of the cluster to use for this target. - -- - `compute_id` - - String - - Deprecated: please use cluster_id instead - -- - `default` - - Boolean - - Whether this target is the default target. - -- - `git` - - Map - - The Git version control settings for the target. See [\_](#targetsnamegit). - -- - `mode` - - String - - The deployment mode for the target. Valid values are `development` or `production`. See [\_](/dev-tools/bundles/deployment-modes.md). - -- - `permissions` - - Sequence - - The permissions for deploying and running the bundle in the target. See [\_](#targetsnamepermissions). - -- - `presets` - - Map - - The deployment presets for the target. See [\_](#targetsnamepresets). - -- - `resources` - - Map - - The resource definitions for the target. See [\_](#targetsnameresources). - -- - `run_as` - - Map - - The identity to use to run the bundle, see [\_](/dev-tools/bundles/run-as.md). See [\_](#targetsnamerun_as). - -- - `sync` - - Map - - The local paths to sync to the target workspace when a bundle is run or deployed. See [\_](#targetsnamesync). - -- - `variables` - - Map - - The custom variable definitions for the target. See [\_](#targetsnamevariables). - -- - `workspace` - - Map - - The Databricks workspace for the target. See [\_](#targetsnameworkspace). - -::: - - -### targets._name_.artifacts - -**`Type: Map`** - -The artifacts to include in the target deployment. - -```yaml -artifacts: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `build` - - String - - An optional set of build commands to run locally before deployment. - -- - `dynamic_version` - - Boolean - - Whether to patch the wheel version dynamically based on the timestamp of the whl file. If this is set to `true`, new code can be deployed without having to update the version in `setup.py` or `pyproject.toml`. This setting is only valid when `type` is set to `whl`. See [\_](/dev-tools/bundles/settings.md#bundle-syntax-mappings-artifacts). - -- - `executable` - - String - - The executable type. Valid values are `bash`, `sh`, and `cmd`. - -- - `files` - - Sequence - - The relative or absolute path to the built artifact files. See [\_](#targetsnameartifactsnamefiles). - -- - `path` - - String - - The local path of the directory for the artifact. - -- - `type` - - String - - Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`. - -::: - - -### targets._name_.artifacts._name_.files - -**`Type: Sequence`** - -The relative or absolute path to the built artifact files. - - - -:::list-table - -- - Key - - Type - - Description - -- - `source` - - String - - Required. The artifact source file. - -::: - - -### targets._name_.bundle - -**`Type: Map`** - -The bundle attributes when deploying to this target. - - - -:::list-table - -- - Key - - Type - - Description - -- - `cluster_id` - - String - - The ID of a cluster to use to run the bundle. See [\_](/dev-tools/bundles/settings.md#cluster_id). - -- - `compute_id` - - String - - Deprecated. The ID of the compute to use to run the bundle. - -- - `databricks_cli_version` - - String - - The Databricks CLI version to use for the bundle. See [\_](/dev-tools/bundles/settings.md#databricks_cli_version). - -- - `deployment` - - Map - - The definition of the bundle deployment. For supported attributes see [\_](/dev-tools/bundles/deployment-modes.md). See [\_](#targetsnamebundledeployment). - -- - `git` - - Map - - The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). See [\_](#targetsnamebundlegit). - -- - `name` - - String - - The name of the bundle. - -- - `uuid` - - String - - Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). - -::: - - -### targets._name_.bundle.deployment - -**`Type: Map`** - -The definition of the bundle deployment. For supported attributes see [\_](/dev-tools/bundles/deployment-modes.md). - - - -:::list-table - -- - Key - - Type - - Description - -- - `fail_on_active_runs` - - Boolean - - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. - -- - `lock` - - Map - - The deployment lock attributes. See [\_](#targetsnamebundledeploymentlock). - -::: - - -### targets._name_.bundle.deployment.lock - -**`Type: Map`** - -The deployment lock attributes. - - - -:::list-table - -- - Key - - Type - - Description - -- - `enabled` - - Boolean - - Whether this lock is enabled. - -- - `force` - - Boolean - - Whether to force this lock if it is enabled. - -::: - - -### targets._name_.bundle.git - -**`Type: Map`** - -The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). - - - -:::list-table - -- - Key - - Type - - Description - -- - `branch` - - String - - The Git branch name. See [\_](/dev-tools/bundles/settings.md#git). - -- - `origin_url` - - String - - The origin URL of the repository. See [\_](/dev-tools/bundles/settings.md#git). - -::: - - -### targets._name_.git - -**`Type: Map`** - -The Git version control settings for the target. - - - -:::list-table - -- - Key - - Type - - Description - -- - `branch` - - String - - The Git branch name. See [\_](/dev-tools/bundles/settings.md#git). - -- - `origin_url` - - String - - The origin URL of the repository. See [\_](/dev-tools/bundles/settings.md#git). - -::: - - -### targets._name_.permissions - -**`Type: Sequence`** - -The permissions for deploying and running the bundle in the target. - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - The name of the group that has the permission set in level. - -- - `level` - - String - - The allowed permission for user, group, service principal defined for this permission. - -- - `service_principal_name` - - String - - The name of the service principal that has the permission set in level. - -- - `user_name` - - String - - The name of the user that has the permission set in level. - -::: - - -### targets._name_.presets - -**`Type: Map`** - -The deployment presets for the target. - - - -:::list-table - -- - Key - - Type - - Description - -- - `artifacts_dynamic_version` - - Boolean - - Whether to enable dynamic_version on all artifacts. - -- - `jobs_max_concurrent_runs` - - Integer - - The maximum concurrent runs for a job. - -- - `name_prefix` - - String - - The prefix for job runs of the bundle. - -- - `pipelines_development` - - Boolean - - Whether pipeline deployments should be locked in development mode. - -- - `source_linked_deployment` - - Boolean - - Whether to link the deployment to the bundle source. - -- - `tags` - - Map - - The tags for the bundle deployment. - -- - `trigger_pause_status` - - String - - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. - -::: - - -### targets._name_.resources - -**`Type: Map`** - -The resource definitions for the target. - - - -:::list-table - -- - Key - - Type - - Description - -- - `apps` - - Map - - The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). - -- - `clusters` - - Map - - The cluster definitions for the bundle, where each key is the name of a cluster. See [\_](/dev-tools/bundles/resources.md#clusters). - -- - `dashboards` - - Map - - The dashboard definitions for the bundle, where each key is the name of the dashboard. See [\_](/dev-tools/bundles/resources.md#dashboards). - -- - `database_catalogs` - - Map - - - -- - `database_instances` - - Map - - - -- - `experiments` - - Map - - The experiment definitions for the bundle, where each key is the name of the experiment. See [\_](/dev-tools/bundles/resources.md#experiments). - -- - `jobs` - - Map - - The job definitions for the bundle, where each key is the name of the job. See [\_](/dev-tools/bundles/resources.md#jobs). - -- - `model_serving_endpoints` - - Map - - The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [\_](/dev-tools/bundles/resources.md#model_serving_endpoints). - -- - `models` - - Map - - The model definitions for the bundle, where each key is the name of the model. See [\_](/dev-tools/bundles/resources.md#models). - -- - `pipelines` - - Map - - The pipeline definitions for the bundle, where each key is the name of the pipeline. See [\_](/dev-tools/bundles/resources.md#pipelines). - -- - `quality_monitors` - - Map - - The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [\_](/dev-tools/bundles/resources.md#quality_monitors). - -- - `registered_models` - - Map - - The registered model definitions for the bundle, where each key is the name of the Unity Catalog registered model. See [\_](/dev-tools/bundles/resources.md#registered_models) - -- - `schemas` - - Map - - The schema definitions for the bundle, where each key is the name of the schema. See [\_](/dev-tools/bundles/resources.md#schemas). - -- - `secret_scopes` - - Map - - The secret scope definitions for the bundle, where each key is the name of the secret scope. See [\_](/dev-tools/bundles/resources.md#secret_scopes). See [\_](#targetsnameresourcessecret_scopes). - -- - `sql_warehouses` - - Map - - The SQL warehouse definitions for the bundle, where each key is the name of the warehouse. See [\_](/dev-tools/bundles/resources.md#sql_warehouses). - -- - `synced_database_tables` - - Map - - See [\_](#targetsnameresourcessynced_database_tables). - -- - `volumes` - - Map - - The volume definitions for the bundle, where each key is the name of the volume. See [\_](/dev-tools/bundles/resources.md#volumes). - -::: - - -### targets._name_.resources.secret_scopes - -**`Type: Map`** - -The secret scope definitions for the bundle, where each key is the name of the secret scope. See [\_](/dev-tools/bundles/resources.md#secret_scopes). - -```yaml -secret_scopes: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `backend_type` - - String - - The backend type the scope will be created with. If not specified, will default to `DATABRICKS` - -- - `keyvault_metadata` - - Map - - The metadata for the secret scope if the `backend_type` is `AZURE_KEYVAULT`. See [\_](#targetsnameresourcessecret_scopesnamekeyvault_metadata). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#targetsnameresourcessecret_scopesnamelifecycle). - -- - `name` - - String - - Scope name requested by the user. Scope names are unique. - -- - `permissions` - - Sequence - - The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. See [\_](#targetsnameresourcessecret_scopesnamepermissions). - -::: - - -### targets._name_.resources.secret_scopes._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### targets._name_.resources.secret_scopes._name_.permissions - -**`Type: Sequence`** - -The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - The name of the group that has the permission set in level. This field translates to a `principal` field in secret scope ACL. - -- - `level` - - String - - The allowed permission for user, group, service principal defined for this permission. - -- - `service_principal_name` - - String - - The application ID of an active service principal. This field translates to a `principal` field in secret scope ACL. - -- - `user_name` - - String - - The name of the user that has the permission set in level. This field translates to a `principal` field in secret scope ACL. - -::: - - -### targets._name_.resources.synced_database_tables - -**`Type: Map`** - - - -```yaml -synced_database_tables: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `data_synchronization_status` - - Map - - See [\_](#targetsnameresourcessynced_database_tablesnamedata_synchronization_status). - -- - `database_instance_name` - - String - - - -- - `effective_database_instance_name` - - String - - - -- - `effective_logical_database_name` - - String - - - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#targetsnameresourcessynced_database_tablesnamelifecycle). - -- - `logical_database_name` - - String - - - -- - `name` - - String - - - -- - `spec` - - Map - - See [\_](#targetsnameresourcessynced_database_tablesnamespec). - -- - `unity_catalog_provisioning_state` - - String - - - -::: - - -### targets._name_.resources.synced_database_tables._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### targets._name_.run_as - -**`Type: Map`** - -The identity to use to run the bundle, see [\_](/dev-tools/bundles/run-as.md). - - - -:::list-table - -- - Key - - Type - - Description - -- - `service_principal_name` - - String - - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - -- - `user_name` - - String - - The email of an active workspace user. Non-admin users can only set this field to their own email. - -::: - - -### targets._name_.sync - -**`Type: Map`** - -The local paths to sync to the target workspace when a bundle is run or deployed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `exclude` - - Sequence - - A list of files or folders to exclude from the bundle. - -- - `include` - - Sequence - - A list of files or folders to include in the bundle. - -- - `paths` - - Sequence - - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. - -::: - - -### targets._name_.variables - -**`Type: Map`** - -The custom variable definitions for the target. - -```yaml -variables: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `default` - - Any - - The default value for the variable. - -- - `description` - - String - - The description of the variable. - -- - `lookup` - - Map - - The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. See [\_](#targetsnamevariablesnamelookup). - -- - `type` - - String - - The type of the variable. - -::: - - -### targets._name_.variables._name_.lookup - -**`Type: Map`** - -The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. - - - -:::list-table - -- - Key - - Type - - Description - -- - `alert` - - String - - The name of the alert for which to retrieve an ID. - -- - `cluster` - - String - - The name of the cluster for which to retrieve an ID. - -- - `cluster_policy` - - String - - The name of the cluster_policy for which to retrieve an ID. - -- - `dashboard` - - String - - The name of the dashboard for which to retrieve an ID. - -- - `instance_pool` - - String - - The name of the instance_pool for which to retrieve an ID. - -- - `job` - - String - - The name of the job for which to retrieve an ID. - -- - `metastore` - - String - - The name of the metastore for which to retrieve an ID. - -- - `notification_destination` - - String - - The name of the notification_destination for which to retrieve an ID. - -- - `pipeline` - - String - - The name of the pipeline for which to retrieve an ID. - -- - `query` - - String - - The name of the query for which to retrieve an ID. - -- - `service_principal` - - String - - The name of the service_principal for which to retrieve an ID. - -- - `warehouse` - - String - - The name of the warehouse for which to retrieve an ID. - -::: - - -### targets._name_.workspace - -**`Type: Map`** - -The Databricks workspace for the target. - - - -:::list-table - -- - Key - - Type - - Description - -- - `artifact_path` - - String - - The artifact path to use within the workspace for both deployments and workflow runs - -- - `auth_type` - - String - - The authentication type. - -- - `azure_client_id` - - String - - The Azure client ID - -- - `azure_environment` - - String - - The Azure environment - -- - `azure_login_app_id` - - String - - The Azure login app ID - -- - `azure_tenant_id` - - String - - The Azure tenant ID - -- - `azure_use_msi` - - Boolean - - Whether to use MSI for Azure - -- - `azure_workspace_resource_id` - - String - - The Azure workspace resource ID - -- - `client_id` - - String - - The client ID for the workspace - -- - `file_path` - - String - - The file path to use within the workspace for both deployments and workflow runs - -- - `google_service_account` - - String - - The Google service account name - -- - `host` - - String - - The Databricks workspace host URL - -- - `profile` - - String - - The Databricks workspace profile name - -- - `resource_path` - - String - - The workspace resource path - -- - `root_path` - - String - - The Databricks workspace root path - -- - `state_path` - - String - - The workspace state path - -::: - - -## variables - -**`Type: Map`** - -Defines a custom variable for the bundle. See [\_](/dev-tools/bundles/settings.md#variables). - -```yaml -variables: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `default` - - Any - - The default value for the variable. - -- - `description` - - String - - The description of the variable - -- - `lookup` - - Map - - The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID. See [\_](#variablesnamelookup). - -- - `type` - - String - - The type of the variable. - -::: - - -### variables._name_.lookup - -**`Type: Map`** - -The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID. - - - -:::list-table - -- - Key - - Type - - Description - -- - `alert` - - String - - The name of the alert for which to retrieve an ID. - -- - `cluster` - - String - - The name of the cluster for which to retrieve an ID. - -- - `cluster_policy` - - String - - The name of the cluster_policy for which to retrieve an ID. - -- - `dashboard` - - String - - The name of the dashboard for which to retrieve an ID. - -- - `instance_pool` - - String - - The name of the instance_pool for which to retrieve an ID. - -- - `job` - - String - - The name of the job for which to retrieve an ID. - -- - `metastore` - - String - - The name of the metastore for which to retrieve an ID. - -- - `notification_destination` - - String - - The name of the notification_destination for which to retrieve an ID. - -- - `pipeline` - - String - - The name of the pipeline for which to retrieve an ID. - -- - `query` - - String - - The name of the query for which to retrieve an ID. - -- - `service_principal` - - String - - The name of the service_principal for which to retrieve an ID. - -- - `warehouse` - - String - - The name of the warehouse for which to retrieve an ID. - -::: - - -## workspace - -**`Type: Map`** - -Defines the Databricks workspace for the bundle. See [\_](/dev-tools/bundles/settings.md#workspace). - - - -:::list-table - -- - Key - - Type - - Description - -- - `artifact_path` - - String - - The artifact path to use within the workspace for both deployments and workflow runs - -- - `auth_type` - - String - - The authentication type. - -- - `azure_client_id` - - String - - The Azure client ID - -- - `azure_environment` - - String - - The Azure environment - -- - `azure_login_app_id` - - String - - The Azure login app ID - -- - `azure_tenant_id` - - String - - The Azure tenant ID - -- - `azure_use_msi` - - Boolean - - Whether to use MSI for Azure - -- - `azure_workspace_resource_id` - - String - - The Azure workspace resource ID - -- - `client_id` - - String - - The client ID for the workspace - -- - `file_path` - - String - - The file path to use within the workspace for both deployments and workflow runs - -- - `google_service_account` - - String - - The Google service account name - -- - `host` - - String - - The Databricks workspace host URL - -- - `profile` - - String - - The Databricks workspace profile name - -- - `resource_path` - - String - - The workspace resource path - -- - `root_path` - - String - - The Databricks workspace root path - -- - `state_path` - - String - - The workspace state path - -::: - \ No newline at end of file diff --git a/bundle/docsgen/output/resources.md b/bundle/docsgen/output/resources.md deleted file mode 100644 index 95dd6f9494..0000000000 --- a/bundle/docsgen/output/resources.md +++ /dev/null @@ -1,9308 +0,0 @@ ---- -description: 'Learn about resources supported by Databricks Asset Bundles and how to configure them.' -last_update: - date: 2025-09-13 ---- - - - -# :re[DABS] resources - -:re[DABS] allows you to specify information about the :re[Databricks] resources used by the bundle in the `resources` mapping in the bundle configuration. See [resources mapping](/dev-tools/bundles/settings.md#resources) and [resources key reference](/dev-tools/bundles/reference.md#resources). - -This article outlines supported resource types for bundles and provides details and an example for each supported type. For additional examples, see [\_](/dev-tools/bundles/resource-examples.md). - -:::tip - -To generate YAML for any existing resource, use the `databricks bundle generate` command. See [\_](/dev-tools/cli/bundle-commands.md#generate). - -::: - -## Supported resources - -The following table lists supported resource types for bundles. Some resources can be created by defining them in a bundle and deploying the bundle, and some resources only support referencing an existing resource to include in the bundle. - -Resources are defined using the corresponding [Databricks REST API](/api/workspace/introduction) object’s create operation request payload, where the object’s supported fields, expressed as YAML, are the resource’s supported properties. Links to documentation for each resource’s corresponding payloads are listed in the table. - -:::tip - -The `databricks bundle validate` command returns warnings if unknown resource properties are found in bundle configuration files. - -::: - -::::aws-azure - -:::list-table - -- - Resource - - Create support - - Corresponding REST API object -- - [app](#apps) - - ✓ - - [App object](/api/workspace/apps/create) -- - [cluster](#clusters) - - ✓ - - [Cluster object](/api/workspace/clusters/create) -- - [dashboard](#dashboards) - - - - [Dashboard object](/api/workspace/lakeview/create) -- - [experiment](#experiments) - - ✓ - - [Experiment object](/api/workspace/experiments/createexperiment) -- - [job](#job) - - ✓ - - [Job object](/api/workspace/jobs/create) -- - [model (legacy)](#models) - - ✓ - - [Model (legacy) object](/api/workspace/modelregistry/createmodel) -- - [model_serving_endpoint](#model_serving_endpoints) - - ✓ - - [Model serving endpoint object](/api/workspace/servingendpoints/create) -- - [pipeline](#pipeline) - - ✓ - - [Pipeline object](/api/workspace/pipelines/create) -- - [quality_monitor](#quality_monitors) - - ✓ - - [Quality monitor object](/api/workspace/qualitymonitors/create) -- - [registered_model](#registered_models) (:re[UC]) - - ✓ - - [Registered model object](/api/workspace/registeredmodels/create) -- - [schema](#schemas) (:re[UC]) - - ✓ - - [Schema object](/api/workspace/schemas/create) -- - [volume](#volumes) (:re[UC]) - - ✓ - - [Volume object](/api/workspace/volumes/create) - -::: - -:::: - -::::gcp - -:::list-table - -- - Resource - - Create support - - Corresponding REST API object -- - [cluster](#clusters) - - ✓ - - [Cluster object](/api/workspace/clusters/create) -- - [dashboard](#dashboards) - - - - [Dashboard object](/api/workspace/lakeview/create) -- - [experiment](#experiments) - - ✓ - - [Experiment object](/api/workspace/experiments/createexperiment) -- - [job](#jobs) - - ✓ - - [Job object](/api/workspace/jobs/create) -- - [model (legacy)](#models) - - ✓ - - [Model (legacy) object](/api/workspace/modelregistry/createmodel) -- - [model_serving_endpoint](#model_serving_endpoints) - - ✓ - - [Model serving endpoint object](/api/workspace/servingendpoints/create) -- - [pipeline](#pipelines) - - ✓ - - [Pipeline object]](/api/workspace/pipelines/create) -- - [quality_monitor](#quality_monitors) - - ✓ - - [Quality monitor object](/api/workspace/qualitymonitors/create) -- - [registered_model](#registered_models) (:re[UC]) - - ✓ - - [Registered model object](/api/workspace/registeredmodels/create) -- - [schema](#schemas) (:re[UC]) - - ✓ - - [Schema object](/api/workspace/schemas/create) -- - [volume](#volumes) (:re[UC]) - - ✓ - - [Volume object](/api/workspace/volumes/create) - -::: - -:::: - - -## apps - -**`Type: Map`** - -The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). - -```yaml -apps: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `budget_policy_id` - - String - - - -- - `config` - - Map - - - -- - `description` - - String - - The description of the app. - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#appsnamelifecycle). - -- - `name` - - String - - The name of the app. The name must contain only lowercase alphanumeric characters and hyphens. It must be unique within the workspace. - -- - `permissions` - - Sequence - - See [\_](#appsnamepermissions). - -- - `resources` - - Sequence - - Resources for the app. See [\_](#appsnameresources). - -- - `source_code_path` - - String - - - -- - `user_api_scopes` - - Sequence - - - -::: - - -### apps._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### apps._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -### apps._name_.resources - -**`Type: Sequence`** - -Resources for the app. - - - -:::list-table - -- - Key - - Type - - Description - -- - `database` - - Map - - See [\_](#appsnameresourcesdatabase). - -- - `description` - - String - - Description of the App Resource. - -- - `job` - - Map - - See [\_](#appsnameresourcesjob). - -- - `name` - - String - - Name of the App Resource. - -- - `secret` - - Map - - See [\_](#appsnameresourcessecret). - -- - `serving_endpoint` - - Map - - See [\_](#appsnameresourcesserving_endpoint). - -- - `sql_warehouse` - - Map - - See [\_](#appsnameresourcessql_warehouse). - -- - `uc_securable` - - Map - - See [\_](#appsnameresourcesuc_securable). - -::: - - -### apps._name_.resources.database - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `database_name` - - String - - - -- - `instance_name` - - String - - - -- - `permission` - - String - - - -::: - - -### apps._name_.resources.job - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -- - `permission` - - String - - - -::: - - -### apps._name_.resources.secret - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `key` - - String - - - -- - `permission` - - String - - Permission to grant on the secret scope. Supported permissions are: "READ", "WRITE", "MANAGE". - -- - `scope` - - String - - - -::: - - -### apps._name_.resources.serving_endpoint - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `name` - - String - - - -- - `permission` - - String - - - -::: - - -### apps._name_.resources.sql_warehouse - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -- - `permission` - - String - - - -::: - - -### apps._name_.resources.uc_securable - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `permission` - - String - - - -- - `securable_full_name` - - String - - - -- - `securable_type` - - String - - - -::: - - -## clusters - -**`Type: Map`** - -The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). - -```yaml -clusters: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `apply_policy_default_values` - - Boolean - - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. - -- - `autoscale` - - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#clustersnameautoscale). - -- - `autotermination_minutes` - - Integer - - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. - -- - `aws_attributes` - - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#clustersnameaws_attributes). - -- - `azure_attributes` - - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#clustersnameazure_attributes). - -- - `cluster_log_conf` - - Map - - The configuration for delivering spark logs to a long-term storage destination. Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#clustersnamecluster_log_conf). - -- - `cluster_name` - - String - - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. For job clusters, the cluster name is automatically set based on the job and job run IDs. - -- - `custom_tags` - - Map - - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - -- - `data_security_mode` - - String - - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used when `kind = CLASSIC_PREVIEW`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. - -- - `docker_image` - - Map - - See [\_](#clustersnamedocker_image). - -- - `driver_instance_pool_id` - - String - - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. - -- - `driver_node_type_id` - - String - - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. - -- - `enable_elastic_disk` - - Boolean - - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. - -- - `enable_local_disk_encryption` - - Boolean - - Whether to enable LUKS on cluster VMs' local disks - -- - `gcp_attributes` - - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#clustersnamegcp_attributes). - -- - `init_scripts` - - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#clustersnameinit_scripts). - -- - `instance_pool_id` - - String - - The optional ID of the instance pool to which the cluster belongs. - -- - `is_single_node` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - -- - `kind` - - String - - - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#clustersnamelifecycle). - -- - `node_type_id` - - String - - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - -- - `num_workers` - - Integer - - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. - -- - `permissions` - - Sequence - - See [\_](#clustersnamepermissions). - -- - `policy_id` - - String - - The ID of the cluster policy used to create the cluster if applicable. - -- - `remote_disk_throughput` - - Integer - - If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks. - -- - `runtime_engine` - - String - - - -- - `single_user_name` - - String - - Single user name if data_security_mode is `SINGLE_USER` - -- - `spark_conf` - - Map - - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - -- - `spark_env_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - -- - `spark_version` - - String - - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. - -- - `ssh_public_keys` - - Sequence - - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - -- - `total_initial_remote_disk_size` - - Integer - - If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks. - -- - `use_ml_runtime` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. - -- - `workload_type` - - Map - - Cluster Attributes showing for clusters workload types. See [\_](#clustersnameworkload_type). - -::: - - -**Example** - -The following example creates a cluster named `my_cluster` and sets that as the cluster to use to run the notebook in `my_job`: - -```yaml -bundle: - name: clusters - -resources: - clusters: - my_cluster: - num_workers: 2 - node_type_id: "i3.xlarge" - autoscale: - min_workers: 2 - max_workers: 7 - spark_version: "13.3.x-scala2.12" - spark_conf: - "spark.executor.memory": "2g" - - jobs: - my_job: - tasks: - - task_key: test_task - notebook_task: - notebook_path: "./src/my_notebook.py" -``` - -### clusters._name_.autoscale - -**`Type: Map`** - -Parameters needed in order to automatically scale clusters up and down based on load. -Note: autoscaling works best with DB runtime versions 3.0 or later. - - - -:::list-table - -- - Key - - Type - - Description - -- - `max_workers` - - Integer - - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. - -- - `min_workers` - - Integer - - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. - -::: - - -### clusters._name_.aws_attributes - -**`Type: Map`** - -Attributes related to clusters running on Amazon Web Services. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `ebs_volume_count` - - Integer - - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. - -- - `ebs_volume_iops` - - Integer - - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_size` - - Integer - - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. - -- - `ebs_volume_throughput` - - Integer - - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_type` - - String - - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `instance_profile_arn` - - String - - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. - -- - `spot_bid_price_percent` - - Integer - - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. - -- - `zone_id` - - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. - -::: - - -### clusters._name_.azure_attributes - -**`Type: Map`** - -Attributes related to clusters running on Microsoft Azure. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `log_analytics_info` - - Map - - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#clustersnameazure_attributeslog_analytics_info). - -- - `spot_bid_max_price` - - Any - - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. - -::: - - -### clusters._name_.azure_attributes.log_analytics_info - -**`Type: Map`** - -Defines values necessary to configure and run Azure Log Analytics agent - - - -:::list-table - -- - Key - - Type - - Description - -- - `log_analytics_primary_key` - - String - - The primary key for the Azure Log Analytics agent configuration - -- - `log_analytics_workspace_id` - - String - - The workspace ID for the Azure Log Analytics agent configuration - -::: - - -### clusters._name_.cluster_log_conf - -**`Type: Map`** - -The configuration for delivering spark logs to a long-term storage destination. -Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified -for one cluster. If the conf is given, the logs will be delivered to the destination every -`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while -the destination of executor logs is `$destination/$clusterId/executor`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `dbfs` - - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#clustersnamecluster_log_confdbfs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#clustersnamecluster_log_confs3). - -- - `volumes` - - Map - - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#clustersnamecluster_log_confvolumes). - -::: - - -### clusters._name_.cluster_log_conf.dbfs - -**`Type: Map`** - -destination needs to be provided. e.g. -`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - dbfs destination, e.g. `dbfs:/my/path` - -::: - - -### clusters._name_.cluster_log_conf.s3 - -**`Type: Map`** - -destination and either the region or endpoint need to be provided. e.g. -`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. - - - -:::list-table - -- - Key - - Type - - Description - -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -- - `kms_key` - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - -- - `region` - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -::: - - -### clusters._name_.cluster_log_conf.volumes - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` - -::: - - -### clusters._name_.docker_image - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `basic_auth` - - Map - - See [\_](#clustersnamedocker_imagebasic_auth). - -- - `url` - - String - - URL of the docker image. - -::: - - -### clusters._name_.docker_image.basic_auth - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `password` - - String - - Password of the user - -- - `username` - - String - - Name of the user - -::: - - -### clusters._name_.gcp_attributes - -**`Type: Map`** - -Attributes related to clusters running on Google Cloud Platform. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - -- - `boot_disk_size` - - Integer - - Boot disk size in GB - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `google_service_account` - - String - - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. - -- - `local_ssd_count` - - Integer - - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. - -- - `use_preemptible_executors` - - Boolean - - This field is deprecated - -- - `zone_id` - - String - - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. - -::: - - -### clusters._name_.init_scripts - -**`Type: Sequence`** - -The configuration for storing init scripts. Any number of destinations can be specified. -The scripts are executed sequentially in the order provided. -If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `abfss` - - Map - - Contains the Azure Data Lake Storage destination path. See [\_](#clustersnameinit_scriptsabfss). - -- - `dbfs` - - Map - - This field is deprecated - -- - `file` - - Map - - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#clustersnameinit_scriptsfile). - -- - `gcs` - - Map - - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#clustersnameinit_scriptsgcs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#clustersnameinit_scriptss3). - -- - `volumes` - - Map - - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#clustersnameinit_scriptsvolumes). - -- - `workspace` - - Map - - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#clustersnameinit_scriptsworkspace). - -::: - - -### clusters._name_.init_scripts.abfss - -**`Type: Map`** - -Contains the Azure Data Lake Storage destination path - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. - -::: - - -### clusters._name_.init_scripts.file - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "file": { "destination": "file:/my/local/file.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - local file destination, e.g. `file:/my/local/file.sh` - -::: - - -### clusters._name_.init_scripts.gcs - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` - -::: - - -### clusters._name_.init_scripts.s3 - -**`Type: Map`** - -destination and either the region or endpoint need to be provided. e.g. -`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. - - - -:::list-table - -- - Key - - Type - - Description - -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -- - `kms_key` - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - -- - `region` - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -::: - - -### clusters._name_.init_scripts.volumes - -**`Type: Map`** - -destination needs to be provided. e.g. -`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` - -::: - - -### clusters._name_.init_scripts.workspace - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` - -::: - - -### clusters._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### clusters._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -### clusters._name_.workload_type - -**`Type: Map`** - -Cluster Attributes showing for clusters workload types. - - - -:::list-table - -- - Key - - Type - - Description - -- - `clients` - - Map - - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [\_](#clustersnameworkload_typeclients). - -::: - - -### clusters._name_.workload_type.clients - -**`Type: Map`** - -defined what type of clients can use the cluster. E.g. Notebooks, Jobs - - - -:::list-table - -- - Key - - Type - - Description - -- - `jobs` - - Boolean - - With jobs set, the cluster can be used for jobs - -- - `notebooks` - - Boolean - - With notebooks set, this cluster can be used for notebooks - -::: - - -## dashboards - -**`Type: Map`** - -The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](/dashboards/index.md). - -```yaml -dashboards: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `create_time` - - String - - The timestamp of when the dashboard was created. - -- - `dashboard_id` - - String - - UUID identifying the dashboard. - -- - `display_name` - - String - - The display name of the dashboard. - -- - `embed_credentials` - - Boolean - - - -- - `etag` - - String - - The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. - -- - `file_path` - - String - - - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#dashboardsnamelifecycle). - -- - `lifecycle_state` - - String - - The state of the dashboard resource. Used for tracking trashed status. - -- - `parent_path` - - String - - The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. - -- - `path` - - String - - The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. - -- - `permissions` - - Sequence - - See [\_](#dashboardsnamepermissions). - -- - `serialized_dashboard` - - Any - - The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. - -- - `update_time` - - String - - The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. - -- - `warehouse_id` - - String - - The warehouse ID used to run the dashboard. - -::: - - -**Example** - -The following example includes and deploys the sample __NYC Taxi Trip Analysis__ dashboard to the Databricks workspace. - -``` yaml -resources: - dashboards: - nyc_taxi_trip_analysis: - display_name: "NYC Taxi Trip Analysis" - file_path: ../src/nyc_taxi_trip_analysis.lvdash.json - warehouse_id: ${var.warehouse_id} -``` -If you use the UI to modify the dashboard, modifications made through the UI are not applied to the dashboard JSON file in the local bundle unless you explicitly update it using `bundle generate`. You can use the `--watch` option to continuously poll and retrieve changes to the dashboard. See [_](/dev-tools/cli/bundle-commands.md#generate). - -In addition, if you attempt to deploy a bundle that contains a dashboard JSON file that is different than the one in the remote workspace, an error will occur. To force the deploy and overwrite the dashboard in the remote workspace with the local one, use the `--force` option. See [_](/dev-tools/cli/bundle-commands.md#deploy). - -### dashboards._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### dashboards._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -## database_catalogs - -**`Type: Map`** - - - -```yaml -database_catalogs: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `create_database_if_not_exists` - - Boolean - - - -- - `database_instance_name` - - String - - The name of the DatabaseInstance housing the database. - -- - `database_name` - - String - - The name of the database (in a instance) associated with the catalog. - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#database_catalogsnamelifecycle). - -- - `name` - - String - - The name of the catalog in UC. - -::: - - -### database_catalogs._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -## database_instances - -**`Type: Map`** - -A DatabaseInstance represents a logical Postgres instance, comprised of both compute and storage. - -```yaml -database_instances: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `capacity` - - String - - The sku of the instance. Valid values are "CU_1", "CU_2", "CU_4", "CU_8". - -- - `enable_pg_native_login` - - Boolean - - Whether the instance has PG native password login enabled. Defaults to true. - -- - `enable_readable_secondaries` - - Boolean - - Whether to enable secondaries to serve read-only traffic. Defaults to false. - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#database_instancesnamelifecycle). - -- - `name` - - String - - The name of the instance. This is the unique identifier for the instance. - -- - `node_count` - - Integer - - The number of nodes in the instance, composed of 1 primary and 0 or more secondaries. Defaults to 1 primary and 0 secondaries. - -- - `parent_instance_ref` - - Map - - The ref of the parent instance. This is only available if the instance is child instance. Input: For specifying the parent instance to create a child instance. Optional. Output: Only populated if provided as input to create a child instance. See [\_](#database_instancesnameparent_instance_ref). - -- - `permissions` - - Sequence - - See [\_](#database_instancesnamepermissions). - -- - `retention_window_in_days` - - Integer - - The retention window for the instance. This is the time window in days for which the historical data is retained. The default value is 7 days. Valid values are 2 to 35 days. - -- - `stopped` - - Boolean - - Whether the instance is stopped. - -::: - - -### database_instances._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### database_instances._name_.parent_instance_ref - -**`Type: Map`** - -The ref of the parent instance. This is only available if the instance is -child instance. -Input: For specifying the parent instance to create a child instance. Optional. -Output: Only populated if provided as input to create a child instance. - - - -:::list-table - -- - Key - - Type - - Description - -- - `branch_time` - - String - - Branch time of the ref database instance. For a parent ref instance, this is the point in time on the parent instance from which the instance was created. For a child ref instance, this is the point in time on the instance from which the child instance was created. Input: For specifying the point in time to create a child instance. Optional. Output: Only populated if provided as input to create a child instance. - -- - `lsn` - - String - - User-specified WAL LSN of the ref database instance. Input: For specifying the WAL LSN to create a child instance. Optional. Output: Only populated if provided as input to create a child instance. - -- - `name` - - String - - Name of the ref database instance. - -::: - - -### database_instances._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -## experiments - -**`Type: Map`** - -The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](/mlflow/experiments.md). - -```yaml -experiments: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `artifact_location` - - String - - Location where artifacts for the experiment are stored. - -- - `creation_time` - - Integer - - Creation time - -- - `experiment_id` - - String - - Unique identifier for the experiment. - -- - `last_update_time` - - Integer - - Last update time - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#experimentsnamelifecycle). - -- - `lifecycle_stage` - - String - - Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. - -- - `name` - - String - - Human readable name that identifies the experiment. - -- - `permissions` - - Sequence - - See [\_](#experimentsnamepermissions). - -- - `tags` - - Sequence - - Tags: Additional metadata key-value pairs. See [\_](#experimentsnametags). - -::: - - -**Example** - -The following example defines an experiment that all users can view: - -```yaml -resources: - experiments: - experiment: - name: my_ml_experiment - permissions: - - level: CAN_READ - group_name: users - description: MLflow experiment used to track runs -``` - -### experiments._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### experiments._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -### experiments._name_.tags - -**`Type: Sequence`** - -Tags: Additional metadata key-value pairs. - - - -:::list-table - -- - Key - - Type - - Description - -- - `key` - - String - - The tag key. - -- - `value` - - String - - The tag value. - -::: - - -## jobs - -**`Type: Map`** - -The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a Databricks Asset Bundles template to create a job, see [_](/dev-tools/bundles/jobs-tutorial.md). - -```yaml -jobs: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `budget_policy_id` - - String - - The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. - -- - `continuous` - - Map - - An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. See [\_](#jobsnamecontinuous). - -- - `deployment` - - Map - - Deployment information for jobs managed by external sources. See [\_](#jobsnamedeployment). - -- - `description` - - String - - An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. - -- - `edit_mode` - - String - - Edit mode of the job. * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. * `EDITABLE`: The job is in an editable state and can be modified. - -- - `email_notifications` - - Map - - An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. See [\_](#jobsnameemail_notifications). - -- - `environments` - - Sequence - - A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. See [\_](#jobsnameenvironments). - -- - `format` - - String - - This field is deprecated - -- - `git_source` - - Map - - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. See [\_](#jobsnamegit_source). - -- - `health` - - Map - - An optional set of health rules that can be defined for this job. See [\_](#jobsnamehealth). - -- - `job_clusters` - - Sequence - - A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. See [\_](#jobsnamejob_clusters). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#jobsnamelifecycle). - -- - `max_concurrent_runs` - - Integer - - An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. - -- - `name` - - String - - An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. - -- - `notification_settings` - - Map - - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. See [\_](#jobsnamenotification_settings). - -- - `parameters` - - Sequence - - Job-level parameter definitions. See [\_](#jobsnameparameters). - -- - `performance_target` - - String - - The performance mode on a serverless job. This field determines the level of compute performance or cost-efficiency for the run. * `STANDARD`: Enables cost-efficient execution of serverless workloads. * `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance. - -- - `permissions` - - Sequence - - See [\_](#jobsnamepermissions). - -- - `queue` - - Map - - The queue settings of the job. See [\_](#jobsnamequeue). - -- - `run_as` - - Map - - Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. See [\_](#jobsnamerun_as). - -- - `schedule` - - Map - - An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. See [\_](#jobsnameschedule). - -- - `tags` - - Map - - A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. - -- - `tasks` - - Sequence - - A list of task specifications to be executed by this job. It supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit). Read endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. See [\_](#jobsnametasks). - -- - `timeout_seconds` - - Integer - - An optional timeout applied to each run of this job. A value of `0` means no timeout. - -- - `trigger` - - Map - - A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. See [\_](#jobsnametrigger). - -- - `webhook_notifications` - - Map - - A collection of system notification IDs to notify when runs of this job begin or complete. See [\_](#jobsnamewebhook_notifications). - -::: - - -**Example** - -The following example defines a job with the resource key `hello-job` with one notebook task: - -```yaml -resources: - jobs: - hello-job: - name: hello-job - tasks: - - task_key: hello-task - notebook_task: - notebook_path: ./hello.py -``` - -For information about defining job tasks and overriding job settings, see [_](/dev-tools/bundles/job-task-types.md), [_](/dev-tools/bundles/job-task-override.md), and [_](/dev-tools/bundles/cluster-override.md). - -### jobs._name_.continuous - -**`Type: Map`** - -An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `pause_status` - - String - - Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. - -- - `task_retry_mode` - - String - - Indicate whether the continuous job is applying task level retries or not. Defaults to NEVER. - -::: - - -### jobs._name_.deployment - -**`Type: Map`** - -Deployment information for jobs managed by external sources. - - - -:::list-table - -- - Key - - Type - - Description - -- - `kind` - - String - - The kind of deployment that manages the job. * `BUNDLE`: The job is managed by Databricks Asset Bundle. - -- - `metadata_file_path` - - String - - Path of the file that contains deployment metadata. - -::: - - -### jobs._name_.email_notifications - -**`Type: Map`** - -An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. - - - -:::list-table - -- - Key - - Type - - Description - -- - `no_alert_for_skipped_runs` - - Boolean - - This field is deprecated - -- - `on_duration_warning_threshold_exceeded` - - Sequence - - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. - -- - `on_failure` - - Sequence - - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. - -- - `on_start` - - Sequence - - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - -- - `on_streaming_backlog_exceeded` - - Sequence - - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - -- - `on_success` - - Sequence - - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - -::: - - -### jobs._name_.environments - -**`Type: Sequence`** - -A list of task execution environment specifications that can be referenced by serverless tasks of this job. -An environment is required to be present for serverless tasks. -For serverless notebook tasks, the environment is accessible in the notebook environment panel. -For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. - - - -:::list-table - -- - Key - - Type - - Description - -- - `environment_key` - - String - - The key of an environment. It has to be unique within a job. - -- - `spec` - - Map - - The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. In this minimal environment spec, only pip dependencies are supported. See [\_](#jobsnameenvironmentsspec). - -::: - - -### jobs._name_.environments.spec - -**`Type: Map`** - -The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. -In this minimal environment spec, only pip dependencies are supported. - - - -:::list-table - -- - Key - - Type - - Description - -- - `client` - - String - - This field is deprecated - -- - `dependencies` - - Sequence - - List of pip dependencies, as supported by the version of pip in this environment. - -- - `environment_version` - - String - - Required. Environment version used by the environment. Each version comes with a specific Python version and a set of Python packages. The version is a string, consisting of an integer. - -::: - - -### jobs._name_.git_source - -**`Type: Map`** - -An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. - -If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. - -Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. - - - -:::list-table - -- - Key - - Type - - Description - -- - `git_branch` - - String - - Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. - -- - `git_commit` - - String - - Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. - -- - `git_provider` - - String - - Unique identifier of the service used to host the Git repository. The value is case insensitive. - -- - `git_snapshot` - - Map - - Read-only state of the remote repository at the time the job was run. This field is only included on job runs. See [\_](#jobsnamegit_sourcegit_snapshot). - -- - `git_tag` - - String - - Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. - -- - `git_url` - - String - - URL of the repository to be cloned by this job. - -::: - - -### jobs._name_.git_source.git_snapshot - -**`Type: Map`** - -Read-only state of the remote repository at the time the job was run. This field is only included on job runs. - - - -:::list-table - -- - Key - - Type - - Description - -- - `used_commit` - - String - - Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to. - -::: - - -### jobs._name_.health - -**`Type: Map`** - -An optional set of health rules that can be defined for this job. - - - -:::list-table - -- - Key - - Type - - Description - -- - `rules` - - Sequence - - See [\_](#jobsnamehealthrules). - -::: - - -### jobs._name_.health.rules - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `metric` - - String - - Specifies the health metric that is being evaluated for a particular health rule. * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. - -- - `op` - - String - - Specifies the operator used to compare the health metric value with the specified threshold. - -- - `value` - - Integer - - Specifies the threshold value that the health metric should obey to satisfy the health rule. - -::: - - -### jobs._name_.job_clusters - -**`Type: Sequence`** - -A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. - - - -:::list-table - -- - Key - - Type - - Description - -- - `job_cluster_key` - - String - - A unique name for the job cluster. This field is required and must be unique within the job. `JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution. - -- - `new_cluster` - - Map - - If new_cluster, a description of a cluster that is created for each task. See [\_](#jobsnamejob_clustersnew_cluster). - -::: - - -### jobs._name_.job_clusters.new_cluster - -**`Type: Map`** - -If new_cluster, a description of a cluster that is created for each task. - - - -:::list-table - -- - Key - - Type - - Description - -- - `apply_policy_default_values` - - Boolean - - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. - -- - `autoscale` - - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#jobsnamejob_clustersnew_clusterautoscale). - -- - `autotermination_minutes` - - Integer - - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. - -- - `aws_attributes` - - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnamejob_clustersnew_clusteraws_attributes). - -- - `azure_attributes` - - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnamejob_clustersnew_clusterazure_attributes). - -- - `cluster_log_conf` - - Map - - The configuration for delivering spark logs to a long-term storage destination. Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#jobsnamejob_clustersnew_clustercluster_log_conf). - -- - `cluster_name` - - String - - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. For job clusters, the cluster name is automatically set based on the job and job run IDs. - -- - `custom_tags` - - Map - - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - -- - `data_security_mode` - - String - - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used when `kind = CLASSIC_PREVIEW`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. - -- - `docker_image` - - Map - - See [\_](#jobsnamejob_clustersnew_clusterdocker_image). - -- - `driver_instance_pool_id` - - String - - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. - -- - `driver_node_type_id` - - String - - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. - -- - `enable_elastic_disk` - - Boolean - - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. - -- - `enable_local_disk_encryption` - - Boolean - - Whether to enable LUKS on cluster VMs' local disks - -- - `gcp_attributes` - - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnamejob_clustersnew_clustergcp_attributes). - -- - `init_scripts` - - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#jobsnamejob_clustersnew_clusterinit_scripts). - -- - `instance_pool_id` - - String - - The optional ID of the instance pool to which the cluster belongs. - -- - `is_single_node` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - -- - `kind` - - String - - - -- - `node_type_id` - - String - - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - -- - `num_workers` - - Integer - - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. - -- - `policy_id` - - String - - The ID of the cluster policy used to create the cluster if applicable. - -- - `remote_disk_throughput` - - Integer - - If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks. - -- - `runtime_engine` - - String - - - -- - `single_user_name` - - String - - Single user name if data_security_mode is `SINGLE_USER` - -- - `spark_conf` - - Map - - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - -- - `spark_env_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - -- - `spark_version` - - String - - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. - -- - `ssh_public_keys` - - Sequence - - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - -- - `total_initial_remote_disk_size` - - Integer - - If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks. - -- - `use_ml_runtime` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. - -- - `workload_type` - - Map - - Cluster Attributes showing for clusters workload types. See [\_](#jobsnamejob_clustersnew_clusterworkload_type). - -::: - - -### jobs._name_.job_clusters.new_cluster.autoscale - -**`Type: Map`** - -Parameters needed in order to automatically scale clusters up and down based on load. -Note: autoscaling works best with DB runtime versions 3.0 or later. - - - -:::list-table - -- - Key - - Type - - Description - -- - `max_workers` - - Integer - - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. - -- - `min_workers` - - Integer - - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. - -::: - - -### jobs._name_.job_clusters.new_cluster.aws_attributes - -**`Type: Map`** - -Attributes related to clusters running on Amazon Web Services. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `ebs_volume_count` - - Integer - - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. - -- - `ebs_volume_iops` - - Integer - - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_size` - - Integer - - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. - -- - `ebs_volume_throughput` - - Integer - - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_type` - - String - - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `instance_profile_arn` - - String - - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. - -- - `spot_bid_price_percent` - - Integer - - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. - -- - `zone_id` - - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. - -::: - - -### jobs._name_.job_clusters.new_cluster.azure_attributes - -**`Type: Map`** - -Attributes related to clusters running on Microsoft Azure. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `log_analytics_info` - - Map - - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#jobsnamejob_clustersnew_clusterazure_attributeslog_analytics_info). - -- - `spot_bid_max_price` - - Any - - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. - -::: - - -### jobs._name_.job_clusters.new_cluster.azure_attributes.log_analytics_info - -**`Type: Map`** - -Defines values necessary to configure and run Azure Log Analytics agent - - - -:::list-table - -- - Key - - Type - - Description - -- - `log_analytics_primary_key` - - String - - The primary key for the Azure Log Analytics agent configuration - -- - `log_analytics_workspace_id` - - String - - The workspace ID for the Azure Log Analytics agent configuration - -::: - - -### jobs._name_.job_clusters.new_cluster.cluster_log_conf - -**`Type: Map`** - -The configuration for delivering spark logs to a long-term storage destination. -Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified -for one cluster. If the conf is given, the logs will be delivered to the destination every -`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while -the destination of executor logs is `$destination/$clusterId/executor`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `dbfs` - - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#jobsnamejob_clustersnew_clustercluster_log_confdbfs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnamejob_clustersnew_clustercluster_log_confs3). - -- - `volumes` - - Map - - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#jobsnamejob_clustersnew_clustercluster_log_confvolumes). - -::: - - -### jobs._name_.job_clusters.new_cluster.cluster_log_conf.dbfs - -**`Type: Map`** - -destination needs to be provided. e.g. -`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - dbfs destination, e.g. `dbfs:/my/path` - -::: - - -### jobs._name_.job_clusters.new_cluster.cluster_log_conf.s3 - -**`Type: Map`** - -destination and either the region or endpoint need to be provided. e.g. -`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. - - - -:::list-table - -- - Key - - Type - - Description - -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -- - `kms_key` - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - -- - `region` - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -::: - - -### jobs._name_.job_clusters.new_cluster.cluster_log_conf.volumes - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` - -::: - - -### jobs._name_.job_clusters.new_cluster.docker_image - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `basic_auth` - - Map - - See [\_](#jobsnamejob_clustersnew_clusterdocker_imagebasic_auth). - -- - `url` - - String - - URL of the docker image. - -::: - - -### jobs._name_.job_clusters.new_cluster.docker_image.basic_auth - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `password` - - String - - Password of the user - -- - `username` - - String - - Name of the user - -::: - - -### jobs._name_.job_clusters.new_cluster.gcp_attributes - -**`Type: Map`** - -Attributes related to clusters running on Google Cloud Platform. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - -- - `boot_disk_size` - - Integer - - Boot disk size in GB - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `google_service_account` - - String - - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. - -- - `local_ssd_count` - - Integer - - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. - -- - `use_preemptible_executors` - - Boolean - - This field is deprecated - -- - `zone_id` - - String - - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. - -::: - - -### jobs._name_.job_clusters.new_cluster.init_scripts - -**`Type: Sequence`** - -The configuration for storing init scripts. Any number of destinations can be specified. -The scripts are executed sequentially in the order provided. -If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `abfss` - - Map - - Contains the Azure Data Lake Storage destination path. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsabfss). - -- - `dbfs` - - Map - - This field is deprecated - -- - `file` - - Map - - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsfile). - -- - `gcs` - - Map - - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsgcs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptss3). - -- - `volumes` - - Map - - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsvolumes). - -- - `workspace` - - Map - - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsworkspace). - -::: - - -### jobs._name_.job_clusters.new_cluster.init_scripts.abfss - -**`Type: Map`** - -Contains the Azure Data Lake Storage destination path - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. - -::: - - -### jobs._name_.job_clusters.new_cluster.init_scripts.file - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "file": { "destination": "file:/my/local/file.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - local file destination, e.g. `file:/my/local/file.sh` - -::: - - -### jobs._name_.job_clusters.new_cluster.init_scripts.gcs - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` - -::: - - -### jobs._name_.job_clusters.new_cluster.init_scripts.s3 - -**`Type: Map`** - -destination and either the region or endpoint need to be provided. e.g. -`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. - - - -:::list-table - -- - Key - - Type - - Description - -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -- - `kms_key` - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - -- - `region` - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -::: - - -### jobs._name_.job_clusters.new_cluster.init_scripts.volumes - -**`Type: Map`** - -destination needs to be provided. e.g. -`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` - -::: - - -### jobs._name_.job_clusters.new_cluster.init_scripts.workspace - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` - -::: - - -### jobs._name_.job_clusters.new_cluster.workload_type - -**`Type: Map`** - -Cluster Attributes showing for clusters workload types. - - - -:::list-table - -- - Key - - Type - - Description - -- - `clients` - - Map - - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [\_](#jobsnamejob_clustersnew_clusterworkload_typeclients). - -::: - - -### jobs._name_.job_clusters.new_cluster.workload_type.clients - -**`Type: Map`** - -defined what type of clients can use the cluster. E.g. Notebooks, Jobs - - - -:::list-table - -- - Key - - Type - - Description - -- - `jobs` - - Boolean - - With jobs set, the cluster can be used for jobs - -- - `notebooks` - - Boolean - - With notebooks set, this cluster can be used for notebooks - -::: - - -### jobs._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### jobs._name_.notification_settings - -**`Type: Map`** - -Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. - - - -:::list-table - -- - Key - - Type - - Description - -- - `no_alert_for_canceled_runs` - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. - -- - `no_alert_for_skipped_runs` - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. - -::: - - -### jobs._name_.parameters - -**`Type: Sequence`** - -Job-level parameter definitions - - - -:::list-table - -- - Key - - Type - - Description - -- - `default` - - String - - Default value of the parameter. - -- - `name` - - String - - The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.` - -::: - - -### jobs._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -### jobs._name_.queue - -**`Type: Map`** - -The queue settings of the job. - - - -:::list-table - -- - Key - - Type - - Description - -- - `enabled` - - Boolean - - If true, enable queueing for the job. This is a required field. - -::: - - -### jobs._name_.run_as - -**`Type: Map`** - -Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. - -Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. - - - -:::list-table - -- - Key - - Type - - Description - -- - `service_principal_name` - - String - - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - -- - `user_name` - - String - - The email of an active workspace user. Non-admin users can only set this field to their own email. - -::: - - -### jobs._name_.schedule - -**`Type: Map`** - -An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `pause_status` - - String - - Indicate whether this schedule is paused or not. - -- - `quartz_cron_expression` - - String - - A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. - -- - `timezone_id` - - String - - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. - -::: - - -### jobs._name_.tasks - -**`Type: Sequence`** - -A list of task specifications to be executed by this job. -It supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit). -Read endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. - - - -:::list-table - -- - Key - - Type - - Description - -- - `clean_rooms_notebook_task` - - Map - - The task runs a [clean rooms](https://docs.databricks.com/en/clean-rooms/index.html) notebook when the `clean_rooms_notebook_task` field is present. See [\_](#jobsnametasksclean_rooms_notebook_task). - -- - `condition_task` - - Map - - The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. The condition task does not require a cluster to execute and does not support retries or notifications. See [\_](#jobsnametaskscondition_task). - -- - `dashboard_task` - - Map - - The task refreshes a dashboard and sends a snapshot to subscribers. See [\_](#jobsnametasksdashboard_task). - -- - `dbt_task` - - Map - - The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. See [\_](#jobsnametasksdbt_task). - -- - `depends_on` - - Sequence - - An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. The key is `task_key`, and the value is the name assigned to the dependent task. See [\_](#jobsnametasksdepends_on). - -- - `description` - - String - - An optional description for this task. - -- - `disable_auto_optimization` - - Boolean - - An option to disable auto optimization in serverless - -- - `email_notifications` - - Map - - An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. See [\_](#jobsnametasksemail_notifications). - -- - `environment_key` - - String - - The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute. - -- - `existing_cluster_id` - - String - - If existing_cluster_id, the ID of an existing cluster that is used for all runs. When running jobs or tasks on an existing cluster, you may need to manually restart the cluster if it stops responding. We suggest running jobs and tasks on new clusters for greater reliability - -- - `for_each_task` - - Map - - The task executes a nested task for every input provided when the `for_each_task` field is present. See [\_](#jobsnametasksfor_each_task). - -- - `health` - - Map - - An optional set of health rules that can be defined for this job. See [\_](#jobsnametaskshealth). - -- - `job_cluster_key` - - String - - If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`. - -- - `libraries` - - Sequence - - An optional list of libraries to be installed on the cluster. The default value is an empty list. See [\_](#jobsnametaskslibraries). - -- - `max_retries` - - Integer - - An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry. - -- - `min_retry_interval_millis` - - Integer - - An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried. - -- - `new_cluster` - - Map - - If new_cluster, a description of a new cluster that is created for each run. See [\_](#jobsnametasksnew_cluster). - -- - `notebook_task` - - Map - - The task runs a notebook when the `notebook_task` field is present. See [\_](#jobsnametasksnotebook_task). - -- - `notification_settings` - - Map - - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. See [\_](#jobsnametasksnotification_settings). - -- - `pipeline_task` - - Map - - The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. See [\_](#jobsnametaskspipeline_task). - -- - `power_bi_task` - - Map - - The task triggers a Power BI semantic model update when the `power_bi_task` field is present. See [\_](#jobsnametaskspower_bi_task). - -- - `python_wheel_task` - - Map - - The task runs a Python wheel when the `python_wheel_task` field is present. See [\_](#jobsnametaskspython_wheel_task). - -- - `retry_on_timeout` - - Boolean - - An optional policy to specify whether to retry a job when it times out. The default behavior is to not retry on timeout. - -- - `run_if` - - String - - An optional value specifying the condition determining whether the task is run once its dependencies have been completed. * `ALL_SUCCESS`: All dependencies have executed and succeeded * `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded * `NONE_FAILED`: None of the dependencies have failed and at least one was executed * `ALL_DONE`: All dependencies have been completed * `AT_LEAST_ONE_FAILED`: At least one dependency failed * `ALL_FAILED`: ALl dependencies have failed - -- - `run_job_task` - - Map - - The task triggers another job when the `run_job_task` field is present. See [\_](#jobsnametasksrun_job_task). - -- - `spark_jar_task` - - Map - - The task runs a JAR when the `spark_jar_task` field is present. See [\_](#jobsnametasksspark_jar_task). - -- - `spark_python_task` - - Map - - The task runs a Python file when the `spark_python_task` field is present. See [\_](#jobsnametasksspark_python_task). - -- - `spark_submit_task` - - Map - - (Legacy) The task runs the spark-submit script when the `spark_submit_task` field is present. This task can run only on new clusters and is not compatible with serverless compute. In the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations. `master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters. By default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage. The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths. See [\_](#jobsnametasksspark_submit_task). - -- - `sql_task` - - Map - - The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. See [\_](#jobsnametaskssql_task). - -- - `task_key` - - String - - A unique name for the task. This field is used to refer to this task from other tasks. This field is required and must be unique within its parent job. On Update or Reset, this field is used to reference the tasks to be updated or reset. - -- - `timeout_seconds` - - Integer - - An optional timeout applied to each run of this job task. A value of `0` means no timeout. - -- - `webhook_notifications` - - Map - - A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. See [\_](#jobsnametaskswebhook_notifications). - -::: - - -### jobs._name_.tasks.clean_rooms_notebook_task - -**`Type: Map`** - -The task runs a [clean rooms](https://docs.databricks.com/en/clean-rooms/index.html) notebook -when the `clean_rooms_notebook_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `clean_room_name` - - String - - The clean room that the notebook belongs to. - -- - `etag` - - String - - Checksum to validate the freshness of the notebook resource (i.e. the notebook being run is the latest version). It can be fetched by calling the :method:cleanroomassets/get API. - -- - `notebook_base_parameters` - - Map - - Base parameters to be used for the clean room notebook job. - -- - `notebook_name` - - String - - Name of the notebook being run. - -::: - - -### jobs._name_.tasks.condition_task - -**`Type: Map`** - -The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. -The condition task does not require a cluster to execute and does not support retries or notifications. - - - -:::list-table - -- - Key - - Type - - Description - -- - `left` - - String - - The left operand of the condition task. Can be either a string value or a job state or parameter reference. - -- - `op` - - String - - * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. - -- - `right` - - String - - The right operand of the condition task. Can be either a string value or a job state or parameter reference. - -::: - - -### jobs._name_.tasks.dashboard_task - -**`Type: Map`** - -The task refreshes a dashboard and sends a snapshot to subscribers. - - - -:::list-table - -- - Key - - Type - - Description - -- - `dashboard_id` - - String - - - -- - `subscription` - - Map - - See [\_](#jobsnametasksdashboard_tasksubscription). - -- - `warehouse_id` - - String - - Optional: The warehouse id to execute the dashboard with for the schedule. If not specified, the default warehouse of the dashboard will be used. - -::: - - -### jobs._name_.tasks.dashboard_task.subscription - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `custom_subject` - - String - - Optional: Allows users to specify a custom subject line on the email sent to subscribers. - -- - `paused` - - Boolean - - When true, the subscription will not send emails. - -- - `subscribers` - - Sequence - - See [\_](#jobsnametasksdashboard_tasksubscriptionsubscribers). - -::: - - -### jobs._name_.tasks.dashboard_task.subscription.subscribers - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination_id` - - String - - - -- - `user_name` - - String - - - -::: - - -### jobs._name_.tasks.dbt_task - -**`Type: Map`** - -The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. - - - -:::list-table - -- - Key - - Type - - Description - -- - `catalog` - - String - - Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1. - -- - `commands` - - Sequence - - A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided. - -- - `profiles_directory` - - String - - Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used. - -- - `project_directory` - - String - - Path to the project directory. Optional for Git sourced tasks, in which case if no value is provided, the root of the Git repository is used. - -- - `schema` - - String - - Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used. - -- - `source` - - String - - Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved from the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Project is located in Databricks workspace. * `GIT`: Project is located in cloud Git provider. - -- - `warehouse_id` - - String - - ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument. - -::: - - -### jobs._name_.tasks.depends_on - -**`Type: Sequence`** - -An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. -The key is `task_key`, and the value is the name assigned to the dependent task. - - - -:::list-table - -- - Key - - Type - - Description - -- - `outcome` - - String - - Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run. - -- - `task_key` - - String - - The name of the task this task depends on. - -::: - - -### jobs._name_.tasks.email_notifications - -**`Type: Map`** - -An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. - - - -:::list-table - -- - Key - - Type - - Description - -- - `no_alert_for_skipped_runs` - - Boolean - - This field is deprecated - -- - `on_duration_warning_threshold_exceeded` - - Sequence - - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. - -- - `on_failure` - - Sequence - - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. - -- - `on_start` - - Sequence - - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - -- - `on_streaming_backlog_exceeded` - - Sequence - - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - -- - `on_success` - - Sequence - - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - -::: - - -### jobs._name_.tasks.for_each_task - -**`Type: Map`** - -The task executes a nested task for every input provided when the `for_each_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `concurrency` - - Integer - - An optional maximum allowed number of concurrent runs of the task. Set this value if you want to be able to execute multiple runs of the task concurrently. - -- - `inputs` - - String - - Array for task to iterate on. This can be a JSON string or a reference to an array parameter. - -- - `task` - - Map - - Configuration for the task that will be run for each element in the array - -::: - - -### jobs._name_.tasks.health - -**`Type: Map`** - -An optional set of health rules that can be defined for this job. - - - -:::list-table - -- - Key - - Type - - Description - -- - `rules` - - Sequence - - See [\_](#jobsnametaskshealthrules). - -::: - - -### jobs._name_.tasks.health.rules - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `metric` - - String - - Specifies the health metric that is being evaluated for a particular health rule. * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. - -- - `op` - - String - - Specifies the operator used to compare the health metric value with the specified threshold. - -- - `value` - - Integer - - Specifies the threshold value that the health metric should obey to satisfy the health rule. - -::: - - -### jobs._name_.tasks.libraries - -**`Type: Sequence`** - -An optional list of libraries to be installed on the cluster. -The default value is an empty list. - - - -:::list-table - -- - Key - - Type - - Description - -- - `cran` - - Map - - Specification of a CRAN library to be installed as part of the library. See [\_](#jobsnametaskslibrariescran). - -- - `egg` - - String - - This field is deprecated - -- - `jar` - - String - - URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "jar": "/Workspace/path/to/library.jar" }`, `{ "jar" : "/Volumes/path/to/library.jar" }` or `{ "jar": "s3://my-bucket/library.jar" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. - -- - `maven` - - Map - - Specification of a maven library to be installed. For example: `{ "coordinates": "org.jsoup:jsoup:1.7.2" }`. See [\_](#jobsnametaskslibrariesmaven). - -- - `pypi` - - Map - - Specification of a PyPi library to be installed. For example: `{ "package": "simplejson" }`. See [\_](#jobsnametaskslibrariespypi). - -- - `requirements` - - String - - URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported. For example: `{ "requirements": "/Workspace/path/to/requirements.txt" }` or `{ "requirements" : "/Volumes/path/to/requirements.txt" }` - -- - `whl` - - String - - URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "whl": "/Workspace/path/to/library.whl" }`, `{ "whl" : "/Volumes/path/to/library.whl" }` or `{ "whl": "s3://my-bucket/library.whl" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. - -::: - - -### jobs._name_.tasks.libraries.cran - -**`Type: Map`** - -Specification of a CRAN library to be installed as part of the library - - - -:::list-table - -- - Key - - Type - - Description - -- - `package` - - String - - The name of the CRAN package to install. - -- - `repo` - - String - - The repository where the package can be found. If not specified, the default CRAN repo is used. - -::: - - -### jobs._name_.tasks.libraries.maven - -**`Type: Map`** - -Specification of a maven library to be installed. For example: -`{ "coordinates": "org.jsoup:jsoup:1.7.2" }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `coordinates` - - String - - Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". - -- - `exclusions` - - Sequence - - List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. Maven dependency exclusions: https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. - -- - `repo` - - String - - Maven repo to install the Maven package from. If omitted, both Maven Central Repository and Spark Packages are searched. - -::: - - -### jobs._name_.tasks.libraries.pypi - -**`Type: Map`** - -Specification of a PyPi library to be installed. For example: -`{ "package": "simplejson" }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `package` - - String - - The name of the pypi package to install. An optional exact version specification is also supported. Examples: "simplejson" and "simplejson==3.8.0". - -- - `repo` - - String - - The repository where the package can be found. If not specified, the default pip index is used. - -::: - - -### jobs._name_.tasks.new_cluster - -**`Type: Map`** - -If new_cluster, a description of a new cluster that is created for each run. - - - -:::list-table - -- - Key - - Type - - Description - -- - `apply_policy_default_values` - - Boolean - - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. - -- - `autoscale` - - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#jobsnametasksnew_clusterautoscale). - -- - `autotermination_minutes` - - Integer - - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. - -- - `aws_attributes` - - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnametasksnew_clusteraws_attributes). - -- - `azure_attributes` - - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnametasksnew_clusterazure_attributes). - -- - `cluster_log_conf` - - Map - - The configuration for delivering spark logs to a long-term storage destination. Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#jobsnametasksnew_clustercluster_log_conf). - -- - `cluster_name` - - String - - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. For job clusters, the cluster name is automatically set based on the job and job run IDs. - -- - `custom_tags` - - Map - - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - -- - `data_security_mode` - - String - - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used when `kind = CLASSIC_PREVIEW`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. - -- - `docker_image` - - Map - - See [\_](#jobsnametasksnew_clusterdocker_image). - -- - `driver_instance_pool_id` - - String - - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. - -- - `driver_node_type_id` - - String - - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. - -- - `enable_elastic_disk` - - Boolean - - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. - -- - `enable_local_disk_encryption` - - Boolean - - Whether to enable LUKS on cluster VMs' local disks - -- - `gcp_attributes` - - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnametasksnew_clustergcp_attributes). - -- - `init_scripts` - - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#jobsnametasksnew_clusterinit_scripts). - -- - `instance_pool_id` - - String - - The optional ID of the instance pool to which the cluster belongs. - -- - `is_single_node` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - -- - `kind` - - String - - - -- - `node_type_id` - - String - - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - -- - `num_workers` - - Integer - - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. - -- - `policy_id` - - String - - The ID of the cluster policy used to create the cluster if applicable. - -- - `remote_disk_throughput` - - Integer - - If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks. - -- - `runtime_engine` - - String - - - -- - `single_user_name` - - String - - Single user name if data_security_mode is `SINGLE_USER` - -- - `spark_conf` - - Map - - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - -- - `spark_env_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - -- - `spark_version` - - String - - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. - -- - `ssh_public_keys` - - Sequence - - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - -- - `total_initial_remote_disk_size` - - Integer - - If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks. - -- - `use_ml_runtime` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. - -- - `workload_type` - - Map - - Cluster Attributes showing for clusters workload types. See [\_](#jobsnametasksnew_clusterworkload_type). - -::: - - -### jobs._name_.tasks.new_cluster.autoscale - -**`Type: Map`** - -Parameters needed in order to automatically scale clusters up and down based on load. -Note: autoscaling works best with DB runtime versions 3.0 or later. - - - -:::list-table - -- - Key - - Type - - Description - -- - `max_workers` - - Integer - - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. - -- - `min_workers` - - Integer - - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. - -::: - - -### jobs._name_.tasks.new_cluster.aws_attributes - -**`Type: Map`** - -Attributes related to clusters running on Amazon Web Services. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `ebs_volume_count` - - Integer - - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. - -- - `ebs_volume_iops` - - Integer - - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_size` - - Integer - - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. - -- - `ebs_volume_throughput` - - Integer - - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_type` - - String - - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `instance_profile_arn` - - String - - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. - -- - `spot_bid_price_percent` - - Integer - - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. - -- - `zone_id` - - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. - -::: - - -### jobs._name_.tasks.new_cluster.azure_attributes - -**`Type: Map`** - -Attributes related to clusters running on Microsoft Azure. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `log_analytics_info` - - Map - - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#jobsnametasksnew_clusterazure_attributeslog_analytics_info). - -- - `spot_bid_max_price` - - Any - - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. - -::: - - -### jobs._name_.tasks.new_cluster.azure_attributes.log_analytics_info - -**`Type: Map`** - -Defines values necessary to configure and run Azure Log Analytics agent - - - -:::list-table - -- - Key - - Type - - Description - -- - `log_analytics_primary_key` - - String - - The primary key for the Azure Log Analytics agent configuration - -- - `log_analytics_workspace_id` - - String - - The workspace ID for the Azure Log Analytics agent configuration - -::: - - -### jobs._name_.tasks.new_cluster.cluster_log_conf - -**`Type: Map`** - -The configuration for delivering spark logs to a long-term storage destination. -Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified -for one cluster. If the conf is given, the logs will be delivered to the destination every -`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while -the destination of executor logs is `$destination/$clusterId/executor`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `dbfs` - - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#jobsnametasksnew_clustercluster_log_confdbfs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnametasksnew_clustercluster_log_confs3). - -- - `volumes` - - Map - - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#jobsnametasksnew_clustercluster_log_confvolumes). - -::: - - -### jobs._name_.tasks.new_cluster.cluster_log_conf.dbfs - -**`Type: Map`** - -destination needs to be provided. e.g. -`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - dbfs destination, e.g. `dbfs:/my/path` - -::: - - -### jobs._name_.tasks.new_cluster.cluster_log_conf.s3 - -**`Type: Map`** - -destination and either the region or endpoint need to be provided. e.g. -`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. - - - -:::list-table - -- - Key - - Type - - Description - -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -- - `kms_key` - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - -- - `region` - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -::: - - -### jobs._name_.tasks.new_cluster.cluster_log_conf.volumes - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` - -::: - - -### jobs._name_.tasks.new_cluster.docker_image - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `basic_auth` - - Map - - See [\_](#jobsnametasksnew_clusterdocker_imagebasic_auth). - -- - `url` - - String - - URL of the docker image. - -::: - - -### jobs._name_.tasks.new_cluster.docker_image.basic_auth - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `password` - - String - - Password of the user - -- - `username` - - String - - Name of the user - -::: - - -### jobs._name_.tasks.new_cluster.gcp_attributes - -**`Type: Map`** - -Attributes related to clusters running on Google Cloud Platform. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - -- - `boot_disk_size` - - Integer - - Boot disk size in GB - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `google_service_account` - - String - - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. - -- - `local_ssd_count` - - Integer - - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. - -- - `use_preemptible_executors` - - Boolean - - This field is deprecated - -- - `zone_id` - - String - - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. - -::: - - -### jobs._name_.tasks.new_cluster.init_scripts - -**`Type: Sequence`** - -The configuration for storing init scripts. Any number of destinations can be specified. -The scripts are executed sequentially in the order provided. -If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `abfss` - - Map - - Contains the Azure Data Lake Storage destination path. See [\_](#jobsnametasksnew_clusterinit_scriptsabfss). - -- - `dbfs` - - Map - - This field is deprecated - -- - `file` - - Map - - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsfile). - -- - `gcs` - - Map - - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsgcs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnametasksnew_clusterinit_scriptss3). - -- - `volumes` - - Map - - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsvolumes). - -- - `workspace` - - Map - - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsworkspace). - -::: - - -### jobs._name_.tasks.new_cluster.init_scripts.abfss - -**`Type: Map`** - -Contains the Azure Data Lake Storage destination path - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. - -::: - - -### jobs._name_.tasks.new_cluster.init_scripts.file - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "file": { "destination": "file:/my/local/file.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - local file destination, e.g. `file:/my/local/file.sh` - -::: - - -### jobs._name_.tasks.new_cluster.init_scripts.gcs - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` - -::: - - -### jobs._name_.tasks.new_cluster.init_scripts.s3 - -**`Type: Map`** - -destination and either the region or endpoint need to be provided. e.g. -`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. - - - -:::list-table - -- - Key - - Type - - Description - -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -- - `kms_key` - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - -- - `region` - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -::: - - -### jobs._name_.tasks.new_cluster.init_scripts.volumes - -**`Type: Map`** - -destination needs to be provided. e.g. -`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` - -::: - - -### jobs._name_.tasks.new_cluster.init_scripts.workspace - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` - -::: - - -### jobs._name_.tasks.new_cluster.workload_type - -**`Type: Map`** - -Cluster Attributes showing for clusters workload types. - - - -:::list-table - -- - Key - - Type - - Description - -- - `clients` - - Map - - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [\_](#jobsnametasksnew_clusterworkload_typeclients). - -::: - - -### jobs._name_.tasks.new_cluster.workload_type.clients - -**`Type: Map`** - -defined what type of clients can use the cluster. E.g. Notebooks, Jobs - - - -:::list-table - -- - Key - - Type - - Description - -- - `jobs` - - Boolean - - With jobs set, the cluster can be used for jobs - -- - `notebooks` - - Boolean - - With notebooks set, this cluster can be used for notebooks - -::: - - -### jobs._name_.tasks.notebook_task - -**`Type: Map`** - -The task runs a notebook when the `notebook_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `base_parameters` - - Map - - Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run Now with parameters specified, the two parameters maps are merged. If the same key is specified in `base_parameters` and in `run-now`, the value from `run-now` is used. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. If the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters, the default value from the notebook is used. Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets). The JSON representation of this field cannot exceed 1MB. - -- - `notebook_path` - - String - - The path of the notebook to be run in the Databricks workspace or remote repository. For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash. For notebooks stored in a remote repository, the path must be relative. This field is required. - -- - `source` - - String - - Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Notebook is located in Databricks workspace. * `GIT`: Notebook is located in cloud Git provider. - -- - `warehouse_id` - - String - - Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses. Note that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail. - -::: - - -### jobs._name_.tasks.notification_settings - -**`Type: Map`** - -Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. - - - -:::list-table - -- - Key - - Type - - Description - -- - `alert_on_last_attempt` - - Boolean - - If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run. - -- - `no_alert_for_canceled_runs` - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. - -- - `no_alert_for_skipped_runs` - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. - -::: - - -### jobs._name_.tasks.pipeline_task - -**`Type: Map`** - -The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. - - - -:::list-table - -- - Key - - Type - - Description - -- - `full_refresh` - - Boolean - - If true, triggers a full refresh on the delta live table. - -- - `pipeline_id` - - String - - The full name of the pipeline task to execute. - -::: - - -### jobs._name_.tasks.power_bi_task - -**`Type: Map`** - -The task triggers a Power BI semantic model update when the `power_bi_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `connection_resource_name` - - String - - The resource name of the UC connection to authenticate from Databricks to Power BI - -- - `power_bi_model` - - Map - - The semantic model to update. See [\_](#jobsnametaskspower_bi_taskpower_bi_model). - -- - `refresh_after_update` - - Boolean - - Whether the model should be refreshed after the update - -- - `tables` - - Sequence - - The tables to be exported to Power BI. See [\_](#jobsnametaskspower_bi_tasktables). - -- - `warehouse_id` - - String - - The SQL warehouse ID to use as the Power BI data source - -::: - - -### jobs._name_.tasks.power_bi_task.power_bi_model - -**`Type: Map`** - -The semantic model to update - - - -:::list-table - -- - Key - - Type - - Description - -- - `authentication_method` - - String - - How the published Power BI model authenticates to Databricks - -- - `model_name` - - String - - The name of the Power BI model - -- - `overwrite_existing` - - Boolean - - Whether to overwrite existing Power BI models - -- - `storage_mode` - - String - - The default storage mode of the Power BI model - -- - `workspace_name` - - String - - The name of the Power BI workspace of the model - -::: - - -### jobs._name_.tasks.power_bi_task.tables - -**`Type: Sequence`** - -The tables to be exported to Power BI - - - -:::list-table - -- - Key - - Type - - Description - -- - `catalog` - - String - - The catalog name in Databricks - -- - `name` - - String - - The table name in Databricks - -- - `schema` - - String - - The schema name in Databricks - -- - `storage_mode` - - String - - The Power BI storage mode of the table - -::: - - -### jobs._name_.tasks.python_wheel_task - -**`Type: Map`** - -The task runs a Python wheel when the `python_wheel_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `entry_point` - - String - - Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()` - -- - `named_parameters` - - Map - - Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null. - -- - `package_name` - - String - - Name of the package to execute - -- - `parameters` - - Sequence - - Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null. - -::: - - -### jobs._name_.tasks.run_job_task - -**`Type: Map`** - -The task triggers another job when the `run_job_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `job_id` - - Integer - - ID of the job to trigger. - -- - `job_parameters` - - Map - - Job-level parameters used to trigger the job. - -- - `pipeline_params` - - Map - - Controls whether the pipeline should perform a full refresh. See [\_](#jobsnametasksrun_job_taskpipeline_params). - -::: - - -### jobs._name_.tasks.run_job_task.pipeline_params - -**`Type: Map`** - -Controls whether the pipeline should perform a full refresh - - - -:::list-table - -- - Key - - Type - - Description - -- - `full_refresh` - - Boolean - - If true, triggers a full refresh on the delta live table. - -::: - - -### jobs._name_.tasks.spark_jar_task - -**`Type: Map`** - -The task runs a JAR when the `spark_jar_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `jar_uri` - - String - - This field is deprecated - -- - `main_class_name` - - String - - The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library. The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail. - -- - `parameters` - - Sequence - - Parameters passed to the main method. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. - -- - `run_as_repl` - - Boolean - - This field is deprecated - -::: - - -### jobs._name_.tasks.spark_python_task - -**`Type: Map`** - -The task runs a Python file when the `spark_python_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `parameters` - - Sequence - - Command line parameters passed to the Python file. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. - -- - `python_file` - - String - - The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required. - -- - `source` - - String - - Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local Databricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, the Python file will be retrieved from a Git repository defined in `git_source`. * `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI. * `GIT`: The Python file is located in a remote Git repository. - -::: - - -### jobs._name_.tasks.spark_submit_task - -**`Type: Map`** - -(Legacy) The task runs the spark-submit script when the `spark_submit_task` field is present. This task can run only on new clusters and is not compatible with serverless compute. - -In the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations. - -`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters. - -By default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage. - -The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths. - - - -:::list-table - -- - Key - - Type - - Description - -- - `parameters` - - Sequence - - Command-line parameters passed to spark submit. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. - -::: - - -### jobs._name_.tasks.sql_task - -**`Type: Map`** - -The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `alert` - - Map - - If alert, indicates that this job must refresh a SQL alert. See [\_](#jobsnametaskssql_taskalert). - -- - `dashboard` - - Map - - If dashboard, indicates that this job must refresh a SQL dashboard. See [\_](#jobsnametaskssql_taskdashboard). - -- - `file` - - Map - - If file, indicates that this job runs a SQL file in a remote Git repository. See [\_](#jobsnametaskssql_taskfile). - -- - `parameters` - - Map - - Parameters to be used for each run of this job. The SQL alert task does not support custom parameters. - -- - `query` - - Map - - If query, indicates that this job must execute a SQL query. See [\_](#jobsnametaskssql_taskquery). - -- - `warehouse_id` - - String - - The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs. - -::: - - -### jobs._name_.tasks.sql_task.alert - -**`Type: Map`** - -If alert, indicates that this job must refresh a SQL alert. - - - -:::list-table - -- - Key - - Type - - Description - -- - `alert_id` - - String - - The canonical identifier of the SQL alert. - -- - `pause_subscriptions` - - Boolean - - If true, the alert notifications are not sent to subscribers. - -- - `subscriptions` - - Sequence - - If specified, alert notifications are sent to subscribers. See [\_](#jobsnametaskssql_taskalertsubscriptions). - -::: - - -### jobs._name_.tasks.sql_task.alert.subscriptions - -**`Type: Sequence`** - -If specified, alert notifications are sent to subscribers. - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination_id` - - String - - The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. - -- - `user_name` - - String - - The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. - -::: - - -### jobs._name_.tasks.sql_task.dashboard - -**`Type: Map`** - -If dashboard, indicates that this job must refresh a SQL dashboard. - - - -:::list-table - -- - Key - - Type - - Description - -- - `custom_subject` - - String - - Subject of the email sent to subscribers of this task. - -- - `dashboard_id` - - String - - The canonical identifier of the SQL dashboard. - -- - `pause_subscriptions` - - Boolean - - If true, the dashboard snapshot is not taken, and emails are not sent to subscribers. - -- - `subscriptions` - - Sequence - - If specified, dashboard snapshots are sent to subscriptions. See [\_](#jobsnametaskssql_taskdashboardsubscriptions). - -::: - - -### jobs._name_.tasks.sql_task.dashboard.subscriptions - -**`Type: Sequence`** - -If specified, dashboard snapshots are sent to subscriptions. - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination_id` - - String - - The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. - -- - `user_name` - - String - - The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. - -::: - - -### jobs._name_.tasks.sql_task.file - -**`Type: Map`** - -If file, indicates that this job runs a SQL file in a remote Git repository. - - - -:::list-table - -- - Key - - Type - - Description - -- - `path` - - String - - Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths. - -- - `source` - - String - - Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: SQL file is located in Databricks workspace. * `GIT`: SQL file is located in cloud Git provider. - -::: - - -### jobs._name_.tasks.sql_task.query - -**`Type: Map`** - -If query, indicates that this job must execute a SQL query. - - - -:::list-table - -- - Key - - Type - - Description - -- - `query_id` - - String - - The canonical identifier of the SQL query. - -::: - - -### jobs._name_.tasks.webhook_notifications - -**`Type: Map`** - -A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. - - - -:::list-table - -- - Key - - Type - - Description - -- - `on_duration_warning_threshold_exceeded` - - Sequence - - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. See [\_](#jobsnametaskswebhook_notificationson_duration_warning_threshold_exceeded). - -- - `on_failure` - - Sequence - - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. See [\_](#jobsnametaskswebhook_notificationson_failure). - -- - `on_start` - - Sequence - - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. See [\_](#jobsnametaskswebhook_notificationson_start). - -- - `on_streaming_backlog_exceeded` - - Sequence - - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. See [\_](#jobsnametaskswebhook_notificationson_streaming_backlog_exceeded). - -- - `on_success` - - Sequence - - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. See [\_](#jobsnametaskswebhook_notificationson_success). - -::: - - -### jobs._name_.tasks.webhook_notifications.on_duration_warning_threshold_exceeded - -**`Type: Sequence`** - -An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -### jobs._name_.tasks.webhook_notifications.on_failure - -**`Type: Sequence`** - -An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -### jobs._name_.tasks.webhook_notifications.on_start - -**`Type: Sequence`** - -An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -### jobs._name_.tasks.webhook_notifications.on_streaming_backlog_exceeded - -**`Type: Sequence`** - -An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. -Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. -Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. -A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -### jobs._name_.tasks.webhook_notifications.on_success - -**`Type: Sequence`** - -An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -### jobs._name_.trigger - -**`Type: Map`** - -A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `file_arrival` - - Map - - File arrival trigger settings. See [\_](#jobsnametriggerfile_arrival). - -- - `pause_status` - - String - - Whether this trigger is paused or not. - -- - `periodic` - - Map - - Periodic trigger settings. See [\_](#jobsnametriggerperiodic). - -::: - - -### jobs._name_.trigger.file_arrival - -**`Type: Map`** - -File arrival trigger settings. - - - -:::list-table - -- - Key - - Type - - Description - -- - `min_time_between_triggers_seconds` - - Integer - - If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds - -- - `url` - - String - - URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. - -- - `wait_after_last_change_seconds` - - Integer - - If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. - -::: - - -### jobs._name_.trigger.periodic - -**`Type: Map`** - -Periodic trigger settings. - - - -:::list-table - -- - Key - - Type - - Description - -- - `interval` - - Integer - - The interval at which the trigger should run. - -- - `unit` - - String - - The unit of time for the interval. - -::: - - -### jobs._name_.webhook_notifications - -**`Type: Map`** - -A collection of system notification IDs to notify when runs of this job begin or complete. - - - -:::list-table - -- - Key - - Type - - Description - -- - `on_duration_warning_threshold_exceeded` - - Sequence - - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. See [\_](#jobsnamewebhook_notificationson_duration_warning_threshold_exceeded). - -- - `on_failure` - - Sequence - - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. See [\_](#jobsnamewebhook_notificationson_failure). - -- - `on_start` - - Sequence - - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. See [\_](#jobsnamewebhook_notificationson_start). - -- - `on_streaming_backlog_exceeded` - - Sequence - - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. See [\_](#jobsnamewebhook_notificationson_streaming_backlog_exceeded). - -- - `on_success` - - Sequence - - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. See [\_](#jobsnamewebhook_notificationson_success). - -::: - - -### jobs._name_.webhook_notifications.on_duration_warning_threshold_exceeded - -**`Type: Sequence`** - -An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -### jobs._name_.webhook_notifications.on_failure - -**`Type: Sequence`** - -An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -### jobs._name_.webhook_notifications.on_start - -**`Type: Sequence`** - -An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -### jobs._name_.webhook_notifications.on_streaming_backlog_exceeded - -**`Type: Sequence`** - -An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. -Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. -Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. -A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -### jobs._name_.webhook_notifications.on_success - -**`Type: Sequence`** - -An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. - - - -:::list-table - -- - Key - - Type - - Description - -- - `id` - - String - - - -::: - - -## model_serving_endpoints - -**`Type: Map`** - -The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](/machine-learning/model-serving/manage-serving-endpoints.md). - -```yaml -model_serving_endpoints: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `ai_gateway` - - Map - - The AI Gateway configuration for the serving endpoint. NOTE: External model, provisioned throughput, and pay-per-token endpoints are fully supported; agent endpoints currently only support inference tables. See [\_](#model_serving_endpointsnameai_gateway). - -- - `budget_policy_id` - - String - - The budget policy to be applied to the serving endpoint. - -- - `config` - - Map - - The core config of the serving endpoint. See [\_](#model_serving_endpointsnameconfig). - -- - `description` - - String - - - -- - `email_notifications` - - Map - - Email notification settings. See [\_](#model_serving_endpointsnameemail_notifications). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#model_serving_endpointsnamelifecycle). - -- - `name` - - String - - The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. - -- - `permissions` - - Sequence - - See [\_](#model_serving_endpointsnamepermissions). - -- - `rate_limits` - - Sequence - - This field is deprecated - -- - `route_optimized` - - Boolean - - Enable route optimization for the serving endpoint. - -- - `tags` - - Sequence - - Tags to be attached to the serving endpoint and automatically propagated to billing logs. See [\_](#model_serving_endpointsnametags). - -::: - - -**Example** - -The following example defines a Unity Catalog model serving endpoint: - -```yaml -resources: - model_serving_endpoints: - uc_model_serving_endpoint: - name: "uc-model-endpoint" - config: - served_entities: - - entity_name: "myCatalog.mySchema.my-ads-model" - entity_version: "10" - workload_size: "Small" - scale_to_zero_enabled: "true" - traffic_config: - routes: - - served_model_name: "my-ads-model-10" - traffic_percentage: "100" - tags: - - key: "team" - value: "data science" -``` - -### model_serving_endpoints._name_.ai_gateway - -**`Type: Map`** - -The AI Gateway configuration for the serving endpoint. NOTE: External model, provisioned throughput, and pay-per-token endpoints are fully supported; agent endpoints currently only support inference tables. - - - -:::list-table - -- - Key - - Type - - Description - -- - `fallback_config` - - Map - - Configuration for traffic fallback which auto fallbacks to other served entities if the request to a served entity fails with certain error codes, to increase availability. See [\_](#model_serving_endpointsnameai_gatewayfallback_config). - -- - `guardrails` - - Map - - Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. See [\_](#model_serving_endpointsnameai_gatewayguardrails). - -- - `inference_table_config` - - Map - - Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. See [\_](#model_serving_endpointsnameai_gatewayinference_table_config). - -- - `rate_limits` - - Sequence - - Configuration for rate limits which can be set to limit endpoint traffic. See [\_](#model_serving_endpointsnameai_gatewayrate_limits). - -- - `usage_tracking_config` - - Map - - Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. See [\_](#model_serving_endpointsnameai_gatewayusage_tracking_config). - -::: - - -### model_serving_endpoints._name_.ai_gateway.fallback_config - -**`Type: Map`** - -Configuration for traffic fallback which auto fallbacks to other served entities if the request to a served -entity fails with certain error codes, to increase availability. - - - -:::list-table - -- - Key - - Type - - Description - -- - `enabled` - - Boolean - - Whether to enable traffic fallback. When a served entity in the serving endpoint returns specific error codes (e.g. 500), the request will automatically be round-robin attempted with other served entities in the same endpoint, following the order of served entity list, until a successful response is returned. If all attempts fail, return the last response with the error code. - -::: - - -### model_serving_endpoints._name_.ai_gateway.guardrails - -**`Type: Map`** - -Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. - - - -:::list-table - -- - Key - - Type - - Description - -- - `input` - - Map - - Configuration for input guardrail filters. See [\_](#model_serving_endpointsnameai_gatewayguardrailsinput). - -- - `output` - - Map - - Configuration for output guardrail filters. See [\_](#model_serving_endpointsnameai_gatewayguardrailsoutput). - -::: - - -### model_serving_endpoints._name_.ai_gateway.guardrails.input - -**`Type: Map`** - -Configuration for input guardrail filters. - - - -:::list-table - -- - Key - - Type - - Description - -- - `invalid_keywords` - - Sequence - - This field is deprecated - -- - `pii` - - Map - - Configuration for guardrail PII filter. See [\_](#model_serving_endpointsnameai_gatewayguardrailsinputpii). - -- - `safety` - - Boolean - - Indicates whether the safety filter is enabled. - -- - `valid_topics` - - Sequence - - This field is deprecated - -::: - - -### model_serving_endpoints._name_.ai_gateway.guardrails.input.pii - -**`Type: Map`** - -Configuration for guardrail PII filter. - - - -:::list-table - -- - Key - - Type - - Description - -- - `behavior` - - String - - Configuration for input guardrail filters. - -::: - - -### model_serving_endpoints._name_.ai_gateway.guardrails.output - -**`Type: Map`** - -Configuration for output guardrail filters. - - - -:::list-table - -- - Key - - Type - - Description - -- - `invalid_keywords` - - Sequence - - This field is deprecated - -- - `pii` - - Map - - Configuration for guardrail PII filter. See [\_](#model_serving_endpointsnameai_gatewayguardrailsoutputpii). - -- - `safety` - - Boolean - - Indicates whether the safety filter is enabled. - -- - `valid_topics` - - Sequence - - This field is deprecated - -::: - - -### model_serving_endpoints._name_.ai_gateway.guardrails.output.pii - -**`Type: Map`** - -Configuration for guardrail PII filter. - - - -:::list-table - -- - Key - - Type - - Description - -- - `behavior` - - String - - Configuration for input guardrail filters. - -::: - - -### model_serving_endpoints._name_.ai_gateway.inference_table_config - -**`Type: Map`** - -Configuration for payload logging using inference tables. -Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. - - - -:::list-table - -- - Key - - Type - - Description - -- - `catalog_name` - - String - - The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. - -- - `enabled` - - Boolean - - Indicates whether the inference table is enabled. - -- - `schema_name` - - String - - The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. - -- - `table_name_prefix` - - String - - The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. - -::: - - -### model_serving_endpoints._name_.ai_gateway.rate_limits - -**`Type: Sequence`** - -Configuration for rate limits which can be set to limit endpoint traffic. - - - -:::list-table - -- - Key - - Type - - Description - -- - `calls` - - Integer - - Used to specify how many calls are allowed for a key within the renewal_period. - -- - `key` - - String - - Key field for a rate limit. Currently, 'user', 'user_group, 'service_principal', and 'endpoint' are supported, with 'endpoint' being the default if not specified. - -- - `principal` - - String - - Principal field for a user, user group, or service principal to apply rate limiting to. Accepts a user email, group name, or service principal application ID. - -- - `renewal_period` - - String - - Renewal period field for a rate limit. Currently, only 'minute' is supported. - -- - `tokens` - - Integer - - Used to specify how many tokens are allowed for a key within the renewal_period. - -::: - - -### model_serving_endpoints._name_.ai_gateway.usage_tracking_config - -**`Type: Map`** - -Configuration to enable usage tracking using system tables. -These tables allow you to monitor operational usage on endpoints and their associated costs. - - - -:::list-table - -- - Key - - Type - - Description - -- - `enabled` - - Boolean - - Whether to enable usage tracking. - -::: - - -### model_serving_endpoints._name_.config - -**`Type: Map`** - -The core config of the serving endpoint. - - - -:::list-table - -- - Key - - Type - - Description - -- - `auto_capture_config` - - Map - - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or updating existing provisioned throughput endpoints that never have inference table configured; in these cases please use AI Gateway to manage inference tables. See [\_](#model_serving_endpointsnameconfigauto_capture_config). - -- - `served_entities` - - Sequence - - The list of served entities under the serving endpoint config. See [\_](#model_serving_endpointsnameconfigserved_entities). - -- - `served_models` - - Sequence - - (Deprecated, use served_entities instead) The list of served models under the serving endpoint config. See [\_](#model_serving_endpointsnameconfigserved_models). - -- - `traffic_config` - - Map - - The traffic configuration associated with the serving endpoint config. See [\_](#model_serving_endpointsnameconfigtraffic_config). - -::: - - -### model_serving_endpoints._name_.config.auto_capture_config - -**`Type: Map`** - -Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. -Note: this field is deprecated for creating new provisioned throughput endpoints, -or updating existing provisioned throughput endpoints that never have inference table configured; -in these cases please use AI Gateway to manage inference tables. - - - -:::list-table - -- - Key - - Type - - Description - -- - `catalog_name` - - String - - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. - -- - `enabled` - - Boolean - - Indicates whether the inference table is enabled. - -- - `schema_name` - - String - - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. - -- - `table_name_prefix` - - String - - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. - -::: - - -### model_serving_endpoints._name_.config.served_entities - -**`Type: Sequence`** - -The list of served entities under the serving endpoint config. - - - -:::list-table - -- - Key - - Type - - Description - -- - `entity_name` - - String - - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of **catalog_name.schema_name.model_name**. - -- - `entity_version` - - String - - - -- - `environment_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` - -- - `external_model` - - Map - - The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_model). - -- - `instance_profile_arn` - - String - - ARN of the instance profile that the served entity uses to access AWS resources. - -- - `max_provisioned_concurrency` - - Integer - - The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified. - -- - `max_provisioned_throughput` - - Integer - - The maximum tokens per second that the endpoint can scale up to. - -- - `min_provisioned_concurrency` - - Integer - - The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified. - -- - `min_provisioned_throughput` - - Integer - - The minimum tokens per second that the endpoint can scale down to. - -- - `name` - - String - - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. - -- - `provisioned_model_units` - - Integer - - The number of model units provisioned. - -- - `scale_to_zero_enabled` - - Boolean - - Whether the compute resources for the served entity should scale down to zero. - -- - `workload_size` - - String - - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified. - -- - `workload_type` - - String - - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model - -**`Type: Map`** - -The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. - - - -:::list-table - -- - Key - - Type - - Description - -- - `ai21labs_config` - - Map - - AI21Labs Config. Only required if the provider is 'ai21labs'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelai21labs_config). - -- - `amazon_bedrock_config` - - Map - - Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelamazon_bedrock_config). - -- - `anthropic_config` - - Map - - Anthropic Config. Only required if the provider is 'anthropic'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelanthropic_config). - -- - `cohere_config` - - Map - - Cohere Config. Only required if the provider is 'cohere'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcohere_config). - -- - `custom_provider_config` - - Map - - Custom Provider Config. Only required if the provider is 'custom'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcustom_provider_config). - -- - `databricks_model_serving_config` - - Map - - Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modeldatabricks_model_serving_config). - -- - `google_cloud_vertex_ai_config` - - Map - - Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelgoogle_cloud_vertex_ai_config). - -- - `name` - - String - - The name of the external model. - -- - `openai_config` - - Map - - OpenAI Config. Only required if the provider is 'openai'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelopenai_config). - -- - `palm_config` - - Map - - PaLM Config. Only required if the provider is 'palm'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelpalm_config). - -- - `provider` - - String - - The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', 'palm', and 'custom'. - -- - `task` - - String - - The task type of the external model. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.ai21labs_config - -**`Type: Map`** - -AI21Labs Config. Only required if the provider is 'ai21labs'. - - - -:::list-table - -- - Key - - Type - - Description - -- - `ai21labs_api_key` - - String - - The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. - -- - `ai21labs_api_key_plaintext` - - String - - An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.amazon_bedrock_config - -**`Type: Map`** - -Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. - - - -:::list-table - -- - Key - - Type - - Description - -- - `aws_access_key_id` - - String - - The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id_plaintext`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. - -- - `aws_access_key_id_plaintext` - - String - - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. - -- - `aws_region` - - String - - The AWS region to use. Bedrock has to be enabled there. - -- - `aws_secret_access_key` - - String - - The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. - -- - `aws_secret_access_key_plaintext` - - String - - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. - -- - `bedrock_provider` - - String - - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. - -- - `instance_profile_arn` - - String - - ARN of the instance profile that the external model will use to access AWS resources. You must authenticate using an instance profile or access keys. If you prefer to authenticate using access keys, see `aws_access_key_id`, `aws_access_key_id_plaintext`, `aws_secret_access_key` and `aws_secret_access_key_plaintext`. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.anthropic_config - -**`Type: Map`** - -Anthropic Config. Only required if the provider is 'anthropic'. - - - -:::list-table - -- - Key - - Type - - Description - -- - `anthropic_api_key` - - String - - The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. - -- - `anthropic_api_key_plaintext` - - String - - The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.cohere_config - -**`Type: Map`** - -Cohere Config. Only required if the provider is 'cohere'. - - - -:::list-table - -- - Key - - Type - - Description - -- - `cohere_api_base` - - String - - This is an optional field to provide a customized base URL for the Cohere API. If left unspecified, the standard Cohere base URL is used. - -- - `cohere_api_key` - - String - - The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. - -- - `cohere_api_key_plaintext` - - String - - The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.custom_provider_config - -**`Type: Map`** - -Custom Provider Config. Only required if the provider is 'custom'. - - - -:::list-table - -- - Key - - Type - - Description - -- - `api_key_auth` - - Map - - This is a field to provide API key authentication for the custom provider API. You can only specify one authentication method. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcustom_provider_configapi_key_auth). - -- - `bearer_token_auth` - - Map - - This is a field to provide bearer token authentication for the custom provider API. You can only specify one authentication method. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcustom_provider_configbearer_token_auth). - -- - `custom_provider_url` - - String - - This is a field to provide the URL of the custom provider API. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.custom_provider_config.api_key_auth - -**`Type: Map`** - -This is a field to provide API key authentication for the custom provider API. -You can only specify one authentication method. - - - -:::list-table - -- - Key - - Type - - Description - -- - `key` - - String - - The name of the API key parameter used for authentication. - -- - `value` - - String - - The Databricks secret key reference for an API Key. If you prefer to paste your token directly, see `value_plaintext`. - -- - `value_plaintext` - - String - - The API Key provided as a plaintext string. If you prefer to reference your token using Databricks Secrets, see `value`. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.custom_provider_config.bearer_token_auth - -**`Type: Map`** - -This is a field to provide bearer token authentication for the custom provider API. -You can only specify one authentication method. - - - -:::list-table - -- - Key - - Type - - Description - -- - `token` - - String - - The Databricks secret key reference for a token. If you prefer to paste your token directly, see `token_plaintext`. - -- - `token_plaintext` - - String - - The token provided as a plaintext string. If you prefer to reference your token using Databricks Secrets, see `token`. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.databricks_model_serving_config - -**`Type: Map`** - -Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. - - - -:::list-table - -- - Key - - Type - - Description - -- - `databricks_api_token` - - String - - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model. If you prefer to paste your API key directly, see `databricks_api_token_plaintext`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. - -- - `databricks_api_token_plaintext` - - String - - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `databricks_api_token`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. - -- - `databricks_workspace_url` - - String - - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.google_cloud_vertex_ai_config - -**`Type: Map`** - -Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. - - - -:::list-table - -- - Key - - Type - - Description - -- - `private_key` - - String - - The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys]. If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext` [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys - -- - `private_key_plaintext` - - String - - The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys]. If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`. [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys - -- - `project_id` - - String - - This is the Google Cloud project id that the service account is associated with. - -- - `region` - - String - - This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more details. Some models are only available in specific regions. [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.openai_config - -**`Type: Map`** - -OpenAI Config. Only required if the provider is 'openai'. - - - -:::list-table - -- - Key - - Type - - Description - -- - `microsoft_entra_client_id` - - String - - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID. - -- - `microsoft_entra_client_secret` - - String - - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication. If you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`. You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. - -- - `microsoft_entra_client_secret_plaintext` - - String - - The client secret used for Microsoft Entra ID authentication provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`. You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. - -- - `microsoft_entra_tenant_id` - - String - - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID. - -- - `openai_api_base` - - String - - This is a field to provide a customized base URl for the OpenAI API. For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure. For other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used. - -- - `openai_api_key` - - String - - The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. - -- - `openai_api_key_plaintext` - - String - - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. - -- - `openai_api_type` - - String - - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread. - -- - `openai_api_version` - - String - - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date. - -- - `openai_deployment_name` - - String - - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service. - -- - `openai_organization` - - String - - This is an optional field to specify the organization in OpenAI or Azure OpenAI. - -::: - - -### model_serving_endpoints._name_.config.served_entities.external_model.palm_config - -**`Type: Map`** - -PaLM Config. Only required if the provider is 'palm'. - - - -:::list-table - -- - Key - - Type - - Description - -- - `palm_api_key` - - String - - The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. - -- - `palm_api_key_plaintext` - - String - - The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. - -::: - - -### model_serving_endpoints._name_.config.served_models - -**`Type: Sequence`** - -(Deprecated, use served_entities instead) The list of served models under the serving endpoint config. - - - -:::list-table - -- - Key - - Type - - Description - -- - `environment_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` - -- - `instance_profile_arn` - - String - - ARN of the instance profile that the served entity uses to access AWS resources. - -- - `max_provisioned_concurrency` - - Integer - - The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified. - -- - `max_provisioned_throughput` - - Integer - - The maximum tokens per second that the endpoint can scale up to. - -- - `min_provisioned_concurrency` - - Integer - - The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified. - -- - `min_provisioned_throughput` - - Integer - - The minimum tokens per second that the endpoint can scale down to. - -- - `model_name` - - String - - - -- - `model_version` - - String - - - -- - `name` - - String - - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. - -- - `provisioned_model_units` - - Integer - - The number of model units provisioned. - -- - `scale_to_zero_enabled` - - Boolean - - Whether the compute resources for the served entity should scale down to zero. - -- - `workload_size` - - String - - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified. - -- - `workload_type` - - String - - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). - -::: - - -### model_serving_endpoints._name_.config.traffic_config - -**`Type: Map`** - -The traffic configuration associated with the serving endpoint config. - - - -:::list-table - -- - Key - - Type - - Description - -- - `routes` - - Sequence - - The list of routes that define traffic to each served entity. See [\_](#model_serving_endpointsnameconfigtraffic_configroutes). - -::: - - -### model_serving_endpoints._name_.config.traffic_config.routes - -**`Type: Sequence`** - -The list of routes that define traffic to each served entity. - - - -:::list-table - -- - Key - - Type - - Description - -- - `served_entity_name` - - String - - - -- - `served_model_name` - - String - - The name of the served model this route configures traffic for. - -- - `traffic_percentage` - - Integer - - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive. - -::: - - -### model_serving_endpoints._name_.email_notifications - -**`Type: Map`** - -Email notification settings. - - - -:::list-table - -- - Key - - Type - - Description - -- - `on_update_failure` - - Sequence - - A list of email addresses to be notified when an endpoint fails to update its configuration or state. - -- - `on_update_success` - - Sequence - - A list of email addresses to be notified when an endpoint successfully updates its configuration or state. - -::: - - -### model_serving_endpoints._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### model_serving_endpoints._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -### model_serving_endpoints._name_.tags - -**`Type: Sequence`** - -Tags to be attached to the serving endpoint and automatically propagated to billing logs. - - - -:::list-table - -- - Key - - Type - - Description - -- - `key` - - String - - Key field for a serving endpoint tag. - -- - `value` - - String - - Optional value field for a serving endpoint tag. - -::: - - -## models - -**`Type: Map`** - -The model resource allows you to define [legacy models](/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use Unity Catalog [registered models](#registered-model) instead. - -```yaml -models: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `description` - - String - - Optional description for registered model. - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#modelsnamelifecycle). - -- - `name` - - String - - Register models under this name - -- - `permissions` - - Sequence - - See [\_](#modelsnamepermissions). - -- - `tags` - - Sequence - - Additional metadata for registered model. See [\_](#modelsnametags). - -::: - - -### models._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### models._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -### models._name_.tags - -**`Type: Sequence`** - -Additional metadata for registered model. - - - -:::list-table - -- - Key - - Type - - Description - -- - `key` - - String - - The tag key. - -- - `value` - - String - - The tag value. - -::: - - -## pipelines - -**`Type: Map`** - -The pipeline resource allows you to create Delta Live Tables [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Databricks Asset Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). - -```yaml -pipelines: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `allow_duplicate_names` - - Boolean - - If false, deployment will fail if name conflicts with that of another pipeline. - -- - `catalog` - - String - - A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. - -- - `channel` - - String - - DLT Release Channel that specifies which version to use. - -- - `clusters` - - Sequence - - Cluster settings for this pipeline deployment. See [\_](#pipelinesnameclusters). - -- - `configuration` - - Map - - String-String configuration for this pipeline execution. - -- - `continuous` - - Boolean - - Whether the pipeline is continuous or triggered. This replaces `trigger`. - -- - `deployment` - - Map - - Deployment type of this pipeline. See [\_](#pipelinesnamedeployment). - -- - `development` - - Boolean - - Whether the pipeline is in Development mode. Defaults to false. - -- - `dry_run` - - Boolean - - - -- - `edition` - - String - - Pipeline product edition. - -- - `environment` - - Map - - Environment specification for this pipeline used to install dependencies. See [\_](#pipelinesnameenvironment). - -- - `event_log` - - Map - - Event log configuration for this pipeline. See [\_](#pipelinesnameevent_log). - -- - `filters` - - Map - - Filters on which Pipeline packages to include in the deployed graph. See [\_](#pipelinesnamefilters). - -- - `id` - - String - - Unique identifier for this pipeline. - -- - `ingestion_definition` - - Map - - The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'schema', 'target', or 'catalog' settings. See [\_](#pipelinesnameingestion_definition). - -- - `libraries` - - Sequence - - Libraries or code needed by this deployment. See [\_](#pipelinesnamelibraries). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#pipelinesnamelifecycle). - -- - `name` - - String - - Friendly identifier for this pipeline. - -- - `notifications` - - Sequence - - List of notification settings for this pipeline. See [\_](#pipelinesnamenotifications). - -- - `permissions` - - Sequence - - See [\_](#pipelinesnamepermissions). - -- - `photon` - - Boolean - - Whether Photon is enabled for this pipeline. - -- - `root_path` - - String - - Root path for this pipeline. This is used as the root directory when editing the pipeline in the Databricks user interface and it is added to sys.path when executing Python sources during pipeline execution. - -- - `schema` - - String - - The default schema (database) where tables are read from or published to. - -- - `serverless` - - Boolean - - Whether serverless compute is enabled for this pipeline. - -- - `storage` - - String - - DBFS root directory for storing checkpoints and tables. - -- - `tags` - - Map - - A map of tags associated with the pipeline. These are forwarded to the cluster as cluster tags, and are therefore subject to the same limitations. A maximum of 25 tags can be added to the pipeline. - -- - `target` - - String - - This field is deprecated - -- - `trigger` - - Map - - Use continuous instead - -::: - - -**Example** - -The following example defines a pipeline with the resource key `hello-pipeline`: - -```yaml -resources: - pipelines: - hello-pipeline: - name: hello-pipeline - clusters: - - label: default - num_workers: 1 - development: true - continuous: false - channel: CURRENT - edition: CORE - photon: false - libraries: - - notebook: - path: ./pipeline.py -``` - -### pipelines._name_.clusters - -**`Type: Sequence`** - -Cluster settings for this pipeline deployment. - - - -:::list-table - -- - Key - - Type - - Description - -- - `apply_policy_default_values` - - Boolean - - Note: This field won't be persisted. Only API users will check this field. - -- - `autoscale` - - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#pipelinesnameclustersautoscale). - -- - `aws_attributes` - - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#pipelinesnameclustersaws_attributes). - -- - `azure_attributes` - - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#pipelinesnameclustersazure_attributes). - -- - `cluster_log_conf` - - Map - - The configuration for delivering spark logs to a long-term storage destination. Only dbfs destinations are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#pipelinesnameclusterscluster_log_conf). - -- - `custom_tags` - - Map - - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - -- - `driver_instance_pool_id` - - String - - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. - -- - `driver_node_type_id` - - String - - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. - -- - `enable_local_disk_encryption` - - Boolean - - Whether to enable local disk encryption for the cluster. - -- - `gcp_attributes` - - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#pipelinesnameclustersgcp_attributes). - -- - `init_scripts` - - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#pipelinesnameclustersinit_scripts). - -- - `instance_pool_id` - - String - - The optional ID of the instance pool to which the cluster belongs. - -- - `label` - - String - - A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`. - -- - `node_type_id` - - String - - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - -- - `num_workers` - - Integer - - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. - -- - `policy_id` - - String - - The ID of the cluster policy used to create the cluster if applicable. - -- - `spark_conf` - - Map - - An object containing a set of optional, user-specified Spark configuration key-value pairs. See :method:clusters/create for more details. - -- - `spark_env_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - -- - `ssh_public_keys` - - Sequence - - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - -::: - - -### pipelines._name_.clusters.autoscale - -**`Type: Map`** - -Parameters needed in order to automatically scale clusters up and down based on load. -Note: autoscaling works best with DB runtime versions 3.0 or later. - - - -:::list-table - -- - Key - - Type - - Description - -- - `max_workers` - - Integer - - The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`. - -- - `min_workers` - - Integer - - The minimum number of workers the cluster can scale down to when underutilized. It is also the initial number of workers the cluster will have after creation. - -- - `mode` - - String - - Databricks Enhanced Autoscaling optimizes cluster utilization by automatically allocating cluster resources based on workload volume, with minimal impact to the data processing latency of your pipelines. Enhanced Autoscaling is available for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` clusters. - -::: - - -### pipelines._name_.clusters.aws_attributes - -**`Type: Map`** - -Attributes related to clusters running on Amazon Web Services. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `ebs_volume_count` - - Integer - - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. - -- - `ebs_volume_iops` - - Integer - - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_size` - - Integer - - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. - -- - `ebs_volume_throughput` - - Integer - - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_type` - - String - - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `instance_profile_arn` - - String - - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. - -- - `spot_bid_price_percent` - - Integer - - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. - -- - `zone_id` - - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. - -::: - - -### pipelines._name_.clusters.azure_attributes - -**`Type: Map`** - -Attributes related to clusters running on Microsoft Azure. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `log_analytics_info` - - Map - - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#pipelinesnameclustersazure_attributeslog_analytics_info). - -- - `spot_bid_max_price` - - Any - - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. - -::: - - -### pipelines._name_.clusters.azure_attributes.log_analytics_info - -**`Type: Map`** - -Defines values necessary to configure and run Azure Log Analytics agent - - - -:::list-table - -- - Key - - Type - - Description - -- - `log_analytics_primary_key` - - String - - The primary key for the Azure Log Analytics agent configuration - -- - `log_analytics_workspace_id` - - String - - The workspace ID for the Azure Log Analytics agent configuration - -::: - - -### pipelines._name_.clusters.cluster_log_conf - -**`Type: Map`** - -The configuration for delivering spark logs to a long-term storage destination. -Only dbfs destinations are supported. Only one destination can be specified -for one cluster. If the conf is given, the logs will be delivered to the destination every -`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while -the destination of executor logs is `$destination/$clusterId/executor`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `dbfs` - - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#pipelinesnameclusterscluster_log_confdbfs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#pipelinesnameclusterscluster_log_confs3). - -- - `volumes` - - Map - - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#pipelinesnameclusterscluster_log_confvolumes). - -::: - - -### pipelines._name_.clusters.cluster_log_conf.dbfs - -**`Type: Map`** - -destination needs to be provided. e.g. -`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - dbfs destination, e.g. `dbfs:/my/path` - -::: - - -### pipelines._name_.clusters.cluster_log_conf.s3 - -**`Type: Map`** - -destination and either the region or endpoint need to be provided. e.g. -`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. - - - -:::list-table - -- - Key - - Type - - Description - -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -- - `kms_key` - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - -- - `region` - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -::: - - -### pipelines._name_.clusters.cluster_log_conf.volumes - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` - -::: - - -### pipelines._name_.clusters.gcp_attributes - -**`Type: Map`** - -Attributes related to clusters running on Google Cloud Platform. -If not specified at cluster creation, a set of default values will be used. - - - -:::list-table - -- - Key - - Type - - Description - -- - `availability` - - String - - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - -- - `boot_disk_size` - - Integer - - Boot disk size in GB - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `google_service_account` - - String - - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. - -- - `local_ssd_count` - - Integer - - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. - -- - `use_preemptible_executors` - - Boolean - - This field is deprecated - -- - `zone_id` - - String - - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. - -::: - - -### pipelines._name_.clusters.init_scripts - -**`Type: Sequence`** - -The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `abfss` - - Map - - Contains the Azure Data Lake Storage destination path. See [\_](#pipelinesnameclustersinit_scriptsabfss). - -- - `dbfs` - - Map - - This field is deprecated - -- - `file` - - Map - - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#pipelinesnameclustersinit_scriptsfile). - -- - `gcs` - - Map - - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#pipelinesnameclustersinit_scriptsgcs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#pipelinesnameclustersinit_scriptss3). - -- - `volumes` - - Map - - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#pipelinesnameclustersinit_scriptsvolumes). - -- - `workspace` - - Map - - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#pipelinesnameclustersinit_scriptsworkspace). - -::: - - -### pipelines._name_.clusters.init_scripts.abfss - -**`Type: Map`** - -Contains the Azure Data Lake Storage destination path - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. - -::: - - -### pipelines._name_.clusters.init_scripts.file - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "file": { "destination": "file:/my/local/file.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - local file destination, e.g. `file:/my/local/file.sh` - -::: - - -### pipelines._name_.clusters.init_scripts.gcs - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` - -::: - - -### pipelines._name_.clusters.init_scripts.s3 - -**`Type: Map`** - -destination and either the region or endpoint need to be provided. e.g. -`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. - - - -:::list-table - -- - Key - - Type - - Description - -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -- - `kms_key` - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - -- - `region` - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -::: - - -### pipelines._name_.clusters.init_scripts.volumes - -**`Type: Map`** - -destination needs to be provided. e.g. -`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` - -::: - - -### pipelines._name_.clusters.init_scripts.workspace - -**`Type: Map`** - -destination needs to be provided, e.g. -`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination` - - String - - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` - -::: - - -### pipelines._name_.deployment - -**`Type: Map`** - -Deployment type of this pipeline. - - - -:::list-table - -- - Key - - Type - - Description - -- - `kind` - - String - - The deployment method that manages the pipeline. - -- - `metadata_file_path` - - String - - The path to the file containing metadata about the deployment. - -::: - - -### pipelines._name_.environment - -**`Type: Map`** - -Environment specification for this pipeline used to install dependencies. - - - -:::list-table - -- - Key - - Type - - Description - -- - `dependencies` - - Sequence - - List of pip dependencies, as supported by the version of pip in this environment. Each dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/ Allowed dependency could be , , (WSFS or Volumes in Databricks), - -::: - - -### pipelines._name_.event_log - -**`Type: Map`** - -Event log configuration for this pipeline - - - -:::list-table - -- - Key - - Type - - Description - -- - `catalog` - - String - - The UC catalog the event log is published under. - -- - `name` - - String - - The name the event log is published to in UC. - -- - `schema` - - String - - The UC schema the event log is published under. - -::: - - -### pipelines._name_.filters - -**`Type: Map`** - -Filters on which Pipeline packages to include in the deployed graph. - - - -:::list-table - -- - Key - - Type - - Description - -- - `exclude` - - Sequence - - Paths to exclude. - -- - `include` - - Sequence - - Paths to include. - -::: - - -### pipelines._name_.ingestion_definition - -**`Type: Map`** - -The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'schema', 'target', or 'catalog' settings. - - - -:::list-table - -- - Key - - Type - - Description - -- - `connection_name` - - String - - Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. - -- - `ingestion_gateway_id` - - String - - Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. - -- - `objects` - - Sequence - - Required. Settings specifying tables to replicate and the destination for the replicated tables. See [\_](#pipelinesnameingestion_definitionobjects). - -- - `table_configuration` - - Map - - Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. See [\_](#pipelinesnameingestion_definitiontable_configuration). - -::: - - -### pipelines._name_.ingestion_definition.objects - -**`Type: Sequence`** - -Required. Settings specifying tables to replicate and the destination for the replicated tables. - - - -:::list-table - -- - Key - - Type - - Description - -- - `report` - - Map - - Select a specific source report. See [\_](#pipelinesnameingestion_definitionobjectsreport). - -- - `schema` - - Map - - Select all tables from a specific source schema. See [\_](#pipelinesnameingestion_definitionobjectsschema). - -- - `table` - - Map - - Select a specific source table. See [\_](#pipelinesnameingestion_definitionobjectstable). - -::: - - -### pipelines._name_.ingestion_definition.objects.report - -**`Type: Map`** - -Select a specific source report. - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination_catalog` - - String - - Required. Destination catalog to store table. - -- - `destination_schema` - - String - - Required. Destination schema to store table. - -- - `destination_table` - - String - - Required. Destination table name. The pipeline fails if a table with that name already exists. - -- - `source_url` - - String - - Required. Report URL in the source system. - -- - `table_configuration` - - Map - - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. See [\_](#pipelinesnameingestion_definitionobjectsreporttable_configuration). - -::: - - -### pipelines._name_.ingestion_definition.objects.report.table_configuration - -**`Type: Map`** - -Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. - - - -:::list-table - -- - Key - - Type - - Description - -- - `exclude_columns` - - Sequence - - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. - -- - `include_columns` - - Sequence - - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. - -- - `primary_keys` - - Sequence - - The primary key of the table used to apply changes. - -- - `sequence_by` - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. - -::: - - -### pipelines._name_.ingestion_definition.objects.schema - -**`Type: Map`** - -Select all tables from a specific source schema. - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination_catalog` - - String - - Required. Destination catalog to store tables. - -- - `destination_schema` - - String - - Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists. - -- - `source_catalog` - - String - - The source catalog name. Might be optional depending on the type of source. - -- - `source_schema` - - String - - Required. Schema name in the source database. - -- - `table_configuration` - - Map - - Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. See [\_](#pipelinesnameingestion_definitionobjectsschematable_configuration). - -::: - - -### pipelines._name_.ingestion_definition.objects.schema.table_configuration - -**`Type: Map`** - -Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. - - - -:::list-table - -- - Key - - Type - - Description - -- - `exclude_columns` - - Sequence - - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. - -- - `include_columns` - - Sequence - - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. - -- - `primary_keys` - - Sequence - - The primary key of the table used to apply changes. - -- - `sequence_by` - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. - -::: - - -### pipelines._name_.ingestion_definition.objects.table - -**`Type: Map`** - -Select a specific source table. - - - -:::list-table - -- - Key - - Type - - Description - -- - `destination_catalog` - - String - - Required. Destination catalog to store table. - -- - `destination_schema` - - String - - Required. Destination schema to store table. - -- - `destination_table` - - String - - Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used. - -- - `source_catalog` - - String - - Source catalog name. Might be optional depending on the type of source. - -- - `source_schema` - - String - - Schema name in the source database. Might be optional depending on the type of source. - -- - `source_table` - - String - - Required. Table name in the source database. - -- - `table_configuration` - - Map - - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. See [\_](#pipelinesnameingestion_definitionobjectstabletable_configuration). - -::: - - -### pipelines._name_.ingestion_definition.objects.table.table_configuration - -**`Type: Map`** - -Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. - - - -:::list-table - -- - Key - - Type - - Description - -- - `exclude_columns` - - Sequence - - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. - -- - `include_columns` - - Sequence - - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. - -- - `primary_keys` - - Sequence - - The primary key of the table used to apply changes. - -- - `sequence_by` - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. - -::: - - -### pipelines._name_.ingestion_definition.table_configuration - -**`Type: Map`** - -Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. - - - -:::list-table - -- - Key - - Type - - Description - -- - `exclude_columns` - - Sequence - - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. - -- - `include_columns` - - Sequence - - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. - -- - `primary_keys` - - Sequence - - The primary key of the table used to apply changes. - -- - `sequence_by` - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. - -::: - - -### pipelines._name_.libraries - -**`Type: Sequence`** - -Libraries or code needed by this deployment. - - - -:::list-table - -- - Key - - Type - - Description - -- - `file` - - Map - - The path to a file that defines a pipeline and is stored in the Databricks Repos. See [\_](#pipelinesnamelibrariesfile). - -- - `glob` - - Map - - The unified field to include source codes. Each entry can be a notebook path, a file path, or a folder path that ends `/**`. This field cannot be used together with `notebook` or `file`. See [\_](#pipelinesnamelibrariesglob). - -- - `notebook` - - Map - - The path to a notebook that defines a pipeline and is stored in the Databricks workspace. See [\_](#pipelinesnamelibrariesnotebook). - -- - `whl` - - String - - This field is deprecated - -::: - - -### pipelines._name_.libraries.file - -**`Type: Map`** - -The path to a file that defines a pipeline and is stored in the Databricks Repos. - - - -:::list-table - -- - Key - - Type - - Description - -- - `path` - - String - - The absolute path of the source code. - -::: - - -### pipelines._name_.libraries.glob - -**`Type: Map`** - -The unified field to include source codes. -Each entry can be a notebook path, a file path, or a folder path that ends `/**`. -This field cannot be used together with `notebook` or `file`. - - - -:::list-table - -- - Key - - Type - - Description - -- - `include` - - String - - The source code to include for pipelines - -::: - - -### pipelines._name_.libraries.notebook - -**`Type: Map`** - -The path to a notebook that defines a pipeline and is stored in the Databricks workspace. - - - -:::list-table - -- - Key - - Type - - Description - -- - `path` - - String - - The absolute path of the source code. - -::: - - -### pipelines._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### pipelines._name_.notifications - -**`Type: Sequence`** - -List of notification settings for this pipeline. - - - -:::list-table - -- - Key - - Type - - Description - -- - `alerts` - - Sequence - - A list of alerts that trigger the sending of notifications to the configured destinations. The supported alerts are: * `on-update-success`: A pipeline update completes successfully. * `on-update-failure`: Each time a pipeline update fails. * `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error. * `on-flow-failure`: A single data flow fails. - -- - `email_recipients` - - Sequence - - A list of email addresses notified when a configured alert is triggered. - -::: - - -### pipelines._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -## quality_monitors - -**`Type: Map`** - -The quality_monitor resource allows you to define a Unity Catalog [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](/machine-learning/model-serving/monitor-diagnose-endpoints.md). - -```yaml -quality_monitors: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `assets_dir` - - String - - [Create:REQ Update:IGN] Field for specifying the absolute path to a custom directory to store data-monitoring assets. Normally prepopulated to a default user location via UI and Python APIs. - -- - `baseline_table_name` - - String - - [Create:OPT Update:OPT] Baseline table name. Baseline data is used to compute drift from the data in the monitored `table_name`. The baseline table and the monitored table shall have the same schema. - -- - `custom_metrics` - - Sequence - - [Create:OPT Update:OPT] Custom metrics. See [\_](#quality_monitorsnamecustom_metrics). - -- - `inference_log` - - Map - - See [\_](#quality_monitorsnameinference_log). - -- - `latest_monitor_failure_msg` - - String - - [Create:ERR Update:IGN] The latest error message for a monitor failure. - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#quality_monitorsnamelifecycle). - -- - `notifications` - - Map - - [Create:OPT Update:OPT] Field for specifying notification settings. See [\_](#quality_monitorsnamenotifications). - -- - `output_schema_name` - - String - - [Create:REQ Update:REQ] Schema where output tables are created. Needs to be in 2-level format {catalog}.{schema} - -- - `schedule` - - Map - - [Create:OPT Update:OPT] The monitor schedule. See [\_](#quality_monitorsnameschedule). - -- - `skip_builtin_dashboard` - - Boolean - - Whether to skip creating a default dashboard summarizing data quality metrics. - -- - `slicing_exprs` - - Sequence - - [Create:OPT Update:OPT] List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For example `slicing_exprs=[“col_1”, “col_2 > 10”]` will generate the following slices: two slices for `col_2 > 10` (True and False), and one slice per unique value in `col1`. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. - -- - `snapshot` - - Map - - Configuration for monitoring snapshot tables. - -- - `table_name` - - String - - - -- - `time_series` - - Map - - Configuration for monitoring time series tables. See [\_](#quality_monitorsnametime_series). - -- - `warehouse_id` - - String - - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. - -::: - - -**Example** - -The following example defines a quality monitor: - -```yaml -resources: - quality_monitors: - my_quality_monitor: - table_name: dev.mlops_schema.predictions - output_schema_name: ${bundle.target}.mlops_schema - assets_dir: /Users/${workspace.current_user.userName}/databricks_lakehouse_monitoring - inference_log: - granularities: [1 day] - model_id_col: model_id - prediction_col: prediction - label_col: price - problem_type: PROBLEM_TYPE_REGRESSION - timestamp_col: timestamp - schedule: - quartz_cron_expression: 0 0 8 * * ? # Run Every day at 8am - timezone_id: UTC -``` - -### quality_monitors._name_.custom_metrics - -**`Type: Sequence`** - -[Create:OPT Update:OPT] Custom metrics. - - - -:::list-table - -- - Key - - Type - - Description - -- - `definition` - - String - - Jinja template for a SQL expression that specifies how to compute the metric. See [create metric definition](https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition). - -- - `input_columns` - - Sequence - - A list of column names in the input table the metric should be computed for. Can use ``":table"`` to indicate that the metric needs information from multiple columns. - -- - `name` - - String - - Name of the metric in the output tables. - -- - `output_data_type` - - String - - The output type of the custom metric. - -- - `type` - - String - - Can only be one of ``"CUSTOM_METRIC_TYPE_AGGREGATE"``, ``"CUSTOM_METRIC_TYPE_DERIVED"``, or ``"CUSTOM_METRIC_TYPE_DRIFT"``. The ``"CUSTOM_METRIC_TYPE_AGGREGATE"`` and ``"CUSTOM_METRIC_TYPE_DERIVED"`` metrics are computed on a single table, whereas the ``"CUSTOM_METRIC_TYPE_DRIFT"`` compare metrics across baseline and input table, or across the two consecutive time windows. - CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table - CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics - CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics - -::: - - -### quality_monitors._name_.inference_log - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `granularities` - - Sequence - - Granularities for aggregating data into time windows based on their timestamp. Valid values are 5 minutes, 30 minutes, 1 hour, 1 day, n weeks, 1 month, or 1 year. - -- - `label_col` - - String - - Column for the label. - -- - `model_id_col` - - String - - Column for the model identifier. - -- - `prediction_col` - - String - - Column for the prediction. - -- - `prediction_proba_col` - - String - - Column for prediction probabilities - -- - `problem_type` - - String - - Problem type the model aims to solve. - -- - `timestamp_col` - - String - - Column for the timestamp. - -::: - - -### quality_monitors._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### quality_monitors._name_.notifications - -**`Type: Map`** - -[Create:OPT Update:OPT] Field for specifying notification settings. - - - -:::list-table - -- - Key - - Type - - Description - -- - `on_failure` - - Map - - Destinations to send notifications on failure/timeout. See [\_](#quality_monitorsnamenotificationson_failure). - -::: - - -### quality_monitors._name_.notifications.on_failure - -**`Type: Map`** - -Destinations to send notifications on failure/timeout. - - - -:::list-table - -- - Key - - Type - - Description - -- - `email_addresses` - - Sequence - - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. - -::: - - -### quality_monitors._name_.schedule - -**`Type: Map`** - -[Create:OPT Update:OPT] The monitor schedule. - - - -:::list-table - -- - Key - - Type - - Description - -- - `pause_status` - - String - - Read only field that indicates whether a schedule is paused or not. - -- - `quartz_cron_expression` - - String - - The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). - -- - `timezone_id` - - String - - The timezone id (e.g., ``PST``) in which to evaluate the quartz expression. - -::: - - -### quality_monitors._name_.snapshot - -**`Type: Map`** - -Configuration for monitoring snapshot tables. - - -### quality_monitors._name_.time_series - -**`Type: Map`** - -Configuration for monitoring time series tables. - - - -:::list-table - -- - Key - - Type - - Description - -- - `granularities` - - Sequence - - Granularities for aggregating data into time windows based on their timestamp. Valid values are 5 minutes, 30 minutes, 1 hour, 1 day, n weeks, 1 month, or 1 year. - -- - `timestamp_col` - - String - - Column for the timestamp. - -::: - - -## registered_models - -**`Type: Map`** - -The registered model resource allows you to define models in Unity Catalog. For information about Unity Catalog [registered models](/api/workspace/registeredmodels/create), see [_](/machine-learning/manage-model-lifecycle/index.md). - -```yaml -registered_models: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `catalog_name` - - String - - The name of the catalog where the schema and the registered model reside - -- - `comment` - - String - - The comment attached to the registered model - -- - `grants` - - Sequence - - See [\_](#registered_modelsnamegrants). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#registered_modelsnamelifecycle). - -- - `name` - - String - - The name of the registered model - -- - `schema_name` - - String - - The name of the schema where the registered model resides - -- - `storage_location` - - String - - The storage location on the cloud under which model version data files are stored - -::: - - -**Example** - -The following example defines a registered model in Unity Catalog: - -```yaml -resources: - registered_models: - model: - name: my_model - catalog_name: ${bundle.target} - schema_name: mlops_schema - comment: Registered model in Unity Catalog for ${bundle.target} deployment target - grants: - - privileges: - - EXECUTE - principal: account users -``` - -### registered_models._name_.grants - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `principal` - - String - - The name of the principal that will be granted privileges - -- - `privileges` - - Sequence - - The privileges to grant to the specified entity - -::: - - -### registered_models._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -## schemas - -**`Type: Map`** - -The schema resource type allows you to define Unity Catalog [schemas](/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: - -- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema. -- Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](/api/workspace/schemas/update). - -```yaml -schemas: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `catalog_name` - - String - - Name of parent catalog. - -- - `comment` - - String - - User-provided free-form text description. - -- - `grants` - - Sequence - - See [\_](#schemasnamegrants). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#schemasnamelifecycle). - -- - `name` - - String - - Name of schema, relative to parent catalog. - -- - `properties` - - Map - - - -- - `storage_root` - - String - - Storage root URL for managed tables within schema. - -::: - - -**Example** - -The following example defines a pipeline with the resource key `my_pipeline` that creates a Unity Catalog schema with the key `my_schema` as the target: - -```yaml -resources: - pipelines: - my_pipeline: - name: test-pipeline-{{.unique_id}} - libraries: - - notebook: - path: ./nb.sql - development: true - catalog: main - target: ${resources.schemas.my_schema.id} - - schemas: - my_schema: - name: test-schema-{{.unique_id}} - catalog_name: main - comment: This schema was created by DABs. -``` - -A top-level grants mapping is not supported by Databricks Asset Bundles, so if you want to set grants for a schema, define the grants for the schema within the `schemas` mapping. For more information about grants, see [_](/data-governance/unity-catalog/manage-privileges/index.md#grant). - -The following example defines a Unity Catalog schema with grants: - -```yaml -resources: - schemas: - my_schema: - name: test-schema - grants: - - principal: users - privileges: - - CAN_MANAGE - - principal: my_team - privileges: - - CAN_READ - catalog_name: main - ``` - -### schemas._name_.grants - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `principal` - - String - - - -- - `privileges` - - Sequence - - - -::: - - -### schemas._name_.grants.privileges - -**`Type: Sequence`** - - - - -### schemas._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -## secret_scopes - -**`Type: Map`** - -The secret scope definitions for the bundle, where each key is the name of the secret scope. See [\_](/dev-tools/bundles/resources.md#secret_scopes). - -```yaml -secret_scopes: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `backend_type` - - String - - The backend type the scope will be created with. If not specified, will default to `DATABRICKS` - -- - `keyvault_metadata` - - Map - - The metadata for the secret scope if the `backend_type` is `AZURE_KEYVAULT`. See [\_](#secret_scopesnamekeyvault_metadata). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#secret_scopesnamelifecycle). - -- - `name` - - String - - Scope name requested by the user. Scope names are unique. - -- - `permissions` - - Sequence - - The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. See [\_](#secret_scopesnamepermissions). - -::: - - -### secret_scopes._name_.keyvault_metadata - -**`Type: Map`** - -The metadata for the secret scope if the `backend_type` is `AZURE_KEYVAULT` - - - -:::list-table - -- - Key - - Type - - Description - -- - `dns_name` - - String - - The DNS of the KeyVault - -- - `resource_id` - - String - - The resource id of the azure KeyVault that user wants to associate the scope with. - -::: - - -### secret_scopes._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### secret_scopes._name_.permissions - -**`Type: Sequence`** - -The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - The name of the group that has the permission set in level. This field translates to a `principal` field in secret scope ACL. - -- - `level` - - String - - The allowed permission for user, group, service principal defined for this permission. - -- - `service_principal_name` - - String - - The application ID of an active service principal. This field translates to a `principal` field in secret scope ACL. - -- - `user_name` - - String - - The name of the user that has the permission set in level. This field translates to a `principal` field in secret scope ACL. - -::: - - -## sql_warehouses - -**`Type: Map`** - -The SQL warehouse definitions for the bundle, where each key is the name of the warehouse. See [\_](/dev-tools/bundles/resources.md#sql_warehouses). - -```yaml -sql_warehouses: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `auto_stop_mins` - - Integer - - The amount of time in minutes that a SQL warehouse must be idle (i.e., no RUNNING queries) before it is automatically stopped. Supported values: - Must be >= 0 mins for serverless warehouses - Must be == 0 or >= 10 mins for non-serverless warehouses - 0 indicates no autostop. Defaults to 120 mins - -- - `channel` - - Map - - Channel Details. See [\_](#sql_warehousesnamechannel). - -- - `cluster_size` - - String - - Size of the clusters allocated for this warehouse. Increasing the size of a spark cluster allows you to run larger queries on it. If you want to increase the number of concurrent queries, please tune max_num_clusters. Supported values: - 2X-Small - X-Small - Small - Medium - Large - X-Large - 2X-Large - 3X-Large - 4X-Large - -- - `creator_name` - - String - - warehouse creator name - -- - `enable_photon` - - Boolean - - Configures whether the warehouse should use Photon optimized clusters. Defaults to true. - -- - `enable_serverless_compute` - - Boolean - - Configures whether the warehouse should use serverless compute - -- - `instance_profile_arn` - - String - - This field is deprecated - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#sql_warehousesnamelifecycle). - -- - `max_num_clusters` - - Integer - - Maximum number of clusters that the autoscaler will create to handle concurrent queries. Supported values: - Must be >= min_num_clusters - Must be <= 30. Defaults to min_clusters if unset. - -- - `min_num_clusters` - - Integer - - Minimum number of available clusters that will be maintained for this SQL warehouse. Increasing this will ensure that a larger number of clusters are always running and therefore may reduce the cold start time for new queries. This is similar to reserved vs. revocable cores in a resource manager. Supported values: - Must be > 0 - Must be <= min(max_num_clusters, 30) Defaults to 1 - -- - `name` - - String - - Logical name for the cluster. Supported values: - Must be unique within an org. - Must be less than 100 characters. - -- - `permissions` - - Sequence - - See [\_](#sql_warehousesnamepermissions). - -- - `spot_instance_policy` - - String - - Configurations whether the warehouse should use spot instances. - -- - `tags` - - Map - - A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) associated with this SQL warehouse. Supported values: - Number of tags < 45. See [\_](#sql_warehousesnametags). - -- - `warehouse_type` - - String - - Warehouse type: `PRO` or `CLASSIC`. If you want to use serverless compute, you must set to `PRO` and also set the field `enable_serverless_compute` to `true`. - -::: - - -### sql_warehouses._name_.channel - -**`Type: Map`** - -Channel Details - - - -:::list-table - -- - Key - - Type - - Description - -- - `dbsql_version` - - String - - - -- - `name` - - String - - - -::: - - -### sql_warehouses._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### sql_warehouses._name_.permissions - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` - - String - - - -::: - - -### sql_warehouses._name_.tags - -**`Type: Map`** - -A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) associated -with this SQL warehouse. - -Supported values: - - Number of tags < 45. - - - -:::list-table - -- - Key - - Type - - Description - -- - `custom_tags` - - Sequence - - See [\_](#sql_warehousesnametagscustom_tags). - -::: - - -### sql_warehouses._name_.tags.custom_tags - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `key` - - String - - - -- - `value` - - String - - - -::: - - -## synced_database_tables - -**`Type: Map`** - -Next field marker: 14 - -```yaml -synced_database_tables: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `database_instance_name` - - String - - - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#synced_database_tablesnamelifecycle). - -- - `logical_database_name` - - String - - - -- - `name` - - String - - - -- - `spec` - - Map - - Specification of a synced database table. See [\_](#synced_database_tablesnamespec). - -::: - - -### synced_database_tables._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - - -### synced_database_tables._name_.spec - -**`Type: Map`** - -Specification of a synced database table. - - - -:::list-table - -- - Key - - Type - - Description - -- - `create_database_objects_if_missing` - - Boolean - - If true, the synced table's logical database and schema resources in PG will be created if they do not already exist. - -- - `existing_pipeline_id` - - String - - At most one of existing_pipeline_id and new_pipeline_spec should be defined. If existing_pipeline_id is defined, the synced table will be bin packed into the existing pipeline referenced. This avoids creating a new pipeline and allows sharing existing compute. In this case, the scheduling_policy of this synced table must match the scheduling policy of the existing pipeline. - -- - `new_pipeline_spec` - - Map - - At most one of existing_pipeline_id and new_pipeline_spec should be defined. If new_pipeline_spec is defined, a new pipeline is created for this synced table. The location pointed to is used to store intermediate files (checkpoints, event logs etc). The caller must have write permissions to create Delta tables in the specified catalog and schema. Again, note this requires write permissions, whereas the source table only requires read permissions. See [\_](#synced_database_tablesnamespecnew_pipeline_spec). - -- - `primary_key_columns` - - Sequence - - Primary Key columns to be used for data insert/update in the destination. - -- - `scheduling_policy` - - String - - Scheduling policy of the underlying pipeline. - -- - `source_table_full_name` - - String - - Three-part (catalog, schema, table) name of the source Delta table. - -- - `timeseries_key` - - String - - Time series key to deduplicate (tie-break) rows with the same primary key. - -::: - - -### synced_database_tables._name_.spec.new_pipeline_spec - -**`Type: Map`** - -At most one of existing_pipeline_id and new_pipeline_spec should be defined. - -If new_pipeline_spec is defined, a new pipeline is created for this synced table. The location pointed to is used -to store intermediate files (checkpoints, event logs etc). The caller must have write permissions to create Delta -tables in the specified catalog and schema. Again, note this requires write permissions, whereas the source table -only requires read permissions. - - - -:::list-table - -- - Key - - Type - - Description - -- - `storage_catalog` - - String - - This field needs to be specified if the destination catalog is a managed postgres catalog. UC catalog for the pipeline to store intermediate files (checkpoints, event logs etc). This needs to be a standard catalog where the user has permissions to create Delta tables. - -- - `storage_schema` - - String - - This field needs to be specified if the destination catalog is a managed postgres catalog. UC schema for the pipeline to store intermediate files (checkpoints, event logs etc). This needs to be in the standard catalog where the user has permissions to create Delta tables. - -::: - - -## volumes - -**`Type: Map`** - -The volume resource type allows you to define and create Unity Catalog [volumes](/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: - -- A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use Databricks Asset Bundles to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments. - -- Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets). - -```yaml -volumes: - : - : -``` - - -:::list-table - -- - Key - - Type - - Description - -- - `catalog_name` - - String - - The name of the catalog where the schema and the volume are - -- - `comment` - - String - - The comment attached to the volume - -- - `grants` - - Sequence - - See [\_](#volumesnamegrants). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#volumesnamelifecycle). - -- - `name` - - String - - The name of the volume - -- - `schema_name` - - String - - The name of the schema where the volume is - -- - `storage_location` - - String - - The storage location on the cloud - -- - `volume_type` - - String - - The type of the volume. An external volume is located in the specified external location. A managed volume is located in the default location which is specified by the parent schema, or the parent catalog, or the Metastore. [Learn more](https://docs.databricks.com/aws/en/volumes/managed-vs-external) - -::: - - -**Example** - -The following example creates a Unity Catalog volume with the key `my_volume`: - -```yaml -resources: - volumes: - my_volume: - catalog_name: main - name: my_volume - schema_name: my_schema -``` - -For an example bundle that runs a job that writes to a file in Unity Catalog volume, see the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples/tree/main/knowledge_base/write_from_job_to_volume). - -### volumes._name_.grants - -**`Type: Sequence`** - - - - - -:::list-table - -- - Key - - Type - - Description - -- - `principal` - - String - - - -- - `privileges` - - Sequence - - - -::: - - -### volumes._name_.grants.privileges - -**`Type: Sequence`** - - - - -### volumes._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table - -- - Key - - Type - - Description - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. - -::: - \ No newline at end of file diff --git a/bundle/docsgen/refs.go b/bundle/docsgen/refs.go deleted file mode 100644 index 5092dcd664..0000000000 --- a/bundle/docsgen/refs.go +++ /dev/null @@ -1,110 +0,0 @@ -package main - -import ( - "log" - "strings" - - "github.com/databricks/cli/libs/jsonschema" -) - -func isReferenceType(v *jsonschema.Schema, refs map[string]*jsonschema.Schema, ownFields map[string]bool) bool { - if v.Type != "object" && v.Type != "array" { - return false - } - if len(v.Properties) > 0 { - return true - } - if v.Items != nil { - items := resolveRefs(v.Items, refs) - if items != nil && items.Type == "object" { - return true - } - } - props := resolveAdditionalProperties(v) - if !isInOwnFields(props, ownFields) { - return false - } - if props != nil { - propsResolved := resolveRefs(props, refs) - return propsResolved.Type == "object" - } - - return false -} - -func isInOwnFields(node *jsonschema.Schema, ownFields map[string]bool) bool { - if node != nil && node.Reference != nil { - return ownFields[getRefType(node)] - } - return true -} - -func resolveAdditionalProperties(v *jsonschema.Schema) *jsonschema.Schema { - if v.AdditionalProperties == nil { - return nil - } - additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) - if !ok { - return nil - } - return additionalProps -} - -func resolveRefs(s *jsonschema.Schema, schemas map[string]*jsonschema.Schema) *jsonschema.Schema { - if s == nil { - return nil - } - - node := s - description := s.Description - markdownDescription := s.MarkdownDescription - examples := getExamples(s.Examples) - deprecated := s.Deprecated - deprecationMessage := s.DeprecationMessage - doNotSuggest := s.DoNotSuggest - - for node.Reference != nil { - ref := getRefType(node) - newNode, ok := schemas[ref] - if !ok { - log.Printf("schema %s not found", ref) - break - } - - if description == "" { - description = newNode.Description - } - if markdownDescription == "" { - markdownDescription = newNode.MarkdownDescription - } - if len(examples) == 0 { - examples = getExamples(newNode.Examples) - } - - node = newNode - } - - newNode := *node - newNode.Description = description - newNode.MarkdownDescription = markdownDescription - newNode.Examples = examples - newNode.Deprecated = deprecated - newNode.DeprecationMessage = deprecationMessage - newNode.DoNotSuggest = doNotSuggest - return &newNode -} - -func getExamples(examples any) []string { - typedExamples, ok := examples.([]string) - if !ok { - return []string{} - } - return typedExamples -} - -func getRefType(node *jsonschema.Schema) string { - if node.Reference == nil { - return "" - } - return strings.TrimPrefix(*node.Reference, "#/$defs/") -} diff --git a/bundle/docsgen/renderer.go b/bundle/docsgen/renderer.go deleted file mode 100644 index 5f6c77258b..0000000000 --- a/bundle/docsgen/renderer.go +++ /dev/null @@ -1,51 +0,0 @@ -package main - -import ( - "fmt" - "runtime" - "strings" -) - -type markdownRenderer struct { - nodes []string -} - -func newMardownRenderer() *markdownRenderer { - return &markdownRenderer{} -} - -func (m *markdownRenderer) add(s string) *markdownRenderer { - m.nodes = append(m.nodes, s) - return m -} - -func (m *markdownRenderer) PlainText(s string) *markdownRenderer { - return m.add(s) -} - -func (m *markdownRenderer) LF() *markdownRenderer { - return m.add(" ") -} - -func (m *markdownRenderer) H2(s string) *markdownRenderer { - return m.add("## " + s) -} - -func (m *markdownRenderer) H3(s string) *markdownRenderer { - return m.add("### " + s) -} - -func (m *markdownRenderer) CodeBlocks(lang, s string) *markdownRenderer { - return m.add(fmt.Sprintf("```%s%s%s%s```", lang, lineFeed(), s, lineFeed())) -} - -func (m *markdownRenderer) String() string { - return strings.Join(m.nodes, lineFeed()) -} - -func lineFeed() string { - if runtime.GOOS == "windows" { - return "\r\n" - } - return "\n" -} diff --git a/bundle/docsgen/templates/reference.md b/bundle/docsgen/templates/reference.md deleted file mode 100644 index 37aeb57f9e..0000000000 --- a/bundle/docsgen/templates/reference.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -description: 'Configuration reference for databricks.yml' -last_update: - date: {{update_date}} ---- - - - -# Configuration reference - -This article provides reference for keys supported by :re[DABS] configuration (YAML). See [\_](/dev-tools/bundles/index.md). - -For complete bundle examples, see [\_](/dev-tools/bundles/resource-examples.md) and the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples). diff --git a/bundle/docsgen/templates/resources.md b/bundle/docsgen/templates/resources.md deleted file mode 100644 index aac2c4d912..0000000000 --- a/bundle/docsgen/templates/resources.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -description: 'Learn about resources supported by Databricks Asset Bundles and how to configure them.' -last_update: - date: {{update_date}} ---- - - - -# :re[DABS] resources - -:re[DABS] allows you to specify information about the :re[Databricks] resources used by the bundle in the `resources` mapping in the bundle configuration. See [resources mapping](/dev-tools/bundles/settings.md#resources) and [resources key reference](/dev-tools/bundles/reference.md#resources). - -This article outlines supported resource types for bundles and provides details and an example for each supported type. For additional examples, see [\_](/dev-tools/bundles/resource-examples.md). - -:::tip - -To generate YAML for any existing resource, use the `databricks bundle generate` command. See [\_](/dev-tools/cli/bundle-commands.md#generate). - -::: - -## Supported resources - -The following table lists supported resource types for bundles. Some resources can be created by defining them in a bundle and deploying the bundle, and some resources only support referencing an existing resource to include in the bundle. - -Resources are defined using the corresponding [Databricks REST API](/api/workspace/introduction) object’s create operation request payload, where the object’s supported fields, expressed as YAML, are the resource’s supported properties. Links to documentation for each resource’s corresponding payloads are listed in the table. - -:::tip - -The `databricks bundle validate` command returns warnings if unknown resource properties are found in bundle configuration files. - -::: - -::::aws-azure - -:::list-table - -- - Resource - - Create support - - Corresponding REST API object -- - [app](#apps) - - ✓ - - [App object](/api/workspace/apps/create) -- - [cluster](#clusters) - - ✓ - - [Cluster object](/api/workspace/clusters/create) -- - [dashboard](#dashboards) - - - - [Dashboard object](/api/workspace/lakeview/create) -- - [experiment](#experiments) - - ✓ - - [Experiment object](/api/workspace/experiments/createexperiment) -- - [job](#job) - - ✓ - - [Job object](/api/workspace/jobs/create) -- - [model (legacy)](#models) - - ✓ - - [Model (legacy) object](/api/workspace/modelregistry/createmodel) -- - [model_serving_endpoint](#model_serving_endpoints) - - ✓ - - [Model serving endpoint object](/api/workspace/servingendpoints/create) -- - [pipeline](#pipeline) - - ✓ - - [Pipeline object](/api/workspace/pipelines/create) -- - [quality_monitor](#quality_monitors) - - ✓ - - [Quality monitor object](/api/workspace/qualitymonitors/create) -- - [registered_model](#registered_models) (:re[UC]) - - ✓ - - [Registered model object](/api/workspace/registeredmodels/create) -- - [schema](#schemas) (:re[UC]) - - ✓ - - [Schema object](/api/workspace/schemas/create) -- - [volume](#volumes) (:re[UC]) - - ✓ - - [Volume object](/api/workspace/volumes/create) - -::: - -:::: - -::::gcp - -:::list-table - -- - Resource - - Create support - - Corresponding REST API object -- - [cluster](#clusters) - - ✓ - - [Cluster object](/api/workspace/clusters/create) -- - [dashboard](#dashboards) - - - - [Dashboard object](/api/workspace/lakeview/create) -- - [experiment](#experiments) - - ✓ - - [Experiment object](/api/workspace/experiments/createexperiment) -- - [job](#jobs) - - ✓ - - [Job object](/api/workspace/jobs/create) -- - [model (legacy)](#models) - - ✓ - - [Model (legacy) object](/api/workspace/modelregistry/createmodel) -- - [model_serving_endpoint](#model_serving_endpoints) - - ✓ - - [Model serving endpoint object](/api/workspace/servingendpoints/create) -- - [pipeline](#pipelines) - - ✓ - - [Pipeline object]](/api/workspace/pipelines/create) -- - [quality_monitor](#quality_monitors) - - ✓ - - [Quality monitor object](/api/workspace/qualitymonitors/create) -- - [registered_model](#registered_models) (:re[UC]) - - ✓ - - [Registered model object](/api/workspace/registeredmodels/create) -- - [schema](#schemas) (:re[UC]) - - ✓ - - [Schema object](/api/workspace/schemas/create) -- - [volume](#volumes) (:re[UC]) - - ✓ - - [Volume object](/api/workspace/volumes/create) - -::: - -:::: diff --git a/bundle/docsgen/testdata/anchors.md b/bundle/docsgen/testdata/anchors.md deleted file mode 100644 index 43f79c975a..0000000000 --- a/bundle/docsgen/testdata/anchors.md +++ /dev/null @@ -1,29 +0,0 @@ -Header - -## some_field - -**`Type: Map`** - -This is a description - - - -:::list-table - -- - Key - - Type - - Description - -- - `my_attribute` - - Map - - Desc with link. See [\_](#some_fieldnamemy_attribute). - -::: - - -### some_field._name_.my_attribute - -**`Type: Boolean`** - -Another description - \ No newline at end of file diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index 689d75ae2e..887a0fe846 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -3,6 +3,7 @@ package main import ( "bytes" "fmt" + "maps" "os" "reflect" "regexp" @@ -21,23 +22,44 @@ import ( ) type annotationHandler struct { - // Annotations read from all annotation files including all overrides + // Annotations read from the annotations.yml override file parsedAnnotations annotation.File + // OpenAPI parser for reading descriptions directly from the spec + openapi *openapiParser // Missing annotations for fields that are found in config that need to be added to the annotation file missingAnnotations annotation.File + // Path mapping for converting between Go type paths and bundle paths + pathMap *pathMapping } -// Adds annotations to the JSON schema reading from the annotation files. -// More details https://json-schema.org/understanding-json-schema/reference/annotations -func newAnnotationHandler(sources []string) (*annotationHandler, error) { - data, err := annotation.LoadAndMerge(sources) +// Adds annotations to the JSON schema reading from the OpenAPI spec and +// the annotations.yml override file. OpenAPI descriptions are used as the base, +// and annotations.yml entries override them. +func newAnnotationHandler(annotationsPath string, openapi *openapiParser) (*annotationHandler, error) { + data, err := annotation.LoadAndMerge([]string{annotationsPath}) if err != nil { return nil, err } - d := &annotationHandler{} - d.parsedAnnotations = data - d.missingAnnotations = annotation.File{} - return d, nil + + pathMap := buildPathMapping() + + // Convert bundle path keys in annotations to Go type path keys. + resolved := annotation.File{} + for key, fields := range data { + // If the key is a bundle path, resolve it to a Go type path. + if tp, ok := pathMap.bundlePathToType[key]; ok { + resolved[tp] = fields + } else { + resolved[key] = fields + } + } + + return &annotationHandler{ + parsedAnnotations: resolved, + openapi: openapi, + missingAnnotations: annotation.File{}, + pathMap: pathMap, + }, nil } func (d *annotationHandler) addAnnotations(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { @@ -47,34 +69,190 @@ func (d *annotationHandler) addAnnotations(typ reflect.Type, s jsonschema.Schema return s } - annotations := d.parsedAnnotations[refPath] - if annotations == nil { - annotations = map[string]annotation.Descriptor{} + // Step 1: Get base annotations from the OpenAPI spec + openapiAnnotations := d.getOpenApiAnnotations(typ, s) + + // Step 2: Get override annotations from annotations.yml + overrideAnnotations := d.parsedAnnotations[refPath] + if overrideAnnotations == nil { + overrideAnnotations = map[string]annotation.Descriptor{} } - rootTypeAnnotation, ok := annotations[RootTypeKey] + // Step 3: Merge. Start with OpenAPI base, apply overrides on top. + merged := map[string]annotation.Descriptor{} + maps.Copy(merged, openapiAnnotations) + for k, v := range overrideAnnotations { + if existing, ok := merged[k]; ok { + merged[k] = mergeDescriptor(existing, v) + } else { + merged[k] = v + } + } + + rootTypeAnnotation, ok := merged[RootTypeKey] if ok { assignAnnotation(&s, rootTypeAnnotation) } for k, v := range s.Properties { - item := annotations[k] + item := merged[k] if item.Description == "" { item.Description = annotation.Placeholder - emptyAnnotations := d.missingAnnotations[refPath] - if emptyAnnotations == nil { - emptyAnnotations = map[string]annotation.Descriptor{} - d.missingAnnotations[refPath] = emptyAnnotations + // Only track missing annotations for CLI types, and only for + // fields that don't have OpenAPI descriptions. Fields with + // OpenAPI descriptions are handled at runtime and don't need + // entries in annotations.yml. + if isCliPath(refPath) { + if _, hasOpenApi := openapiAnnotations[k]; !hasOpenApi { + emptyAnnotations := d.missingAnnotations[refPath] + if emptyAnnotations == nil { + emptyAnnotations = map[string]annotation.Descriptor{} + d.missingAnnotations[refPath] = emptyAnnotations + } + emptyAnnotations[k] = annotation.Descriptor{ + Description: annotation.Placeholder, + } + } } - emptyAnnotations[k] = item } assignAnnotation(v, item) } return s } -// Writes missing annotations with placeholder values back to the annotation file +// getOpenApiAnnotations reads annotations for the given type directly from +// the OpenAPI spec. This replaces the previous approach of pre-extracting +// annotations into a separate YAML file. +func (d *annotationHandler) getOpenApiAnnotations(typ reflect.Type, s jsonschema.Schema) map[string]annotation.Descriptor { + result := map[string]annotation.Descriptor{} + + if d.openapi == nil { + return result + } + + // Also check embedded (anonymous) struct types for promoted field descriptions. + // For example, resources.Dashboard embeds dashboards.Dashboard from the SDK, + // and promoted fields should get descriptions from the embedded type's OpenAPI entry. + derefTyp := typ + for derefTyp.Kind() == reflect.Pointer { + derefTyp = derefTyp.Elem() + } + if derefTyp.Kind() == reflect.Struct { + for i := range derefTyp.NumField() { + field := derefTyp.Field(i) + if !field.Anonymous { + continue + } + embeddedResult := d.getOpenApiAnnotations(field.Type, s) + for k, v := range embeddedResult { + if k == RootTypeKey { + continue // Don't inherit root type annotations from embedded types + } + if _, exists := result[k]; !exists { + result[k] = v + } + } + } + } + + ref, ok := d.openapi.findRef(typ) + if !ok { + return result + } + + // Root type annotation + preview := ref.Preview + if preview == "PUBLIC" { + preview = "" + } + outputOnly := isOutputOnly(ref) + if ref.Description != "" || ref.Enum != nil || ref.Deprecated || ref.DeprecationMessage != "" || preview != "" || outputOnly != nil { + if ref.Deprecated && ref.DeprecationMessage == "" { + ref.DeprecationMessage = "This field is deprecated" + } + result[RootTypeKey] = annotation.Descriptor{ + Description: ref.Description, + Enum: ref.Enum, + DeprecationMessage: ref.DeprecationMessage, + Preview: preview, + OutputOnly: outputOnly, + } + } + + // Property annotations + for k := range s.Properties { + if refProp, ok := ref.Properties[k]; ok { + propPreview := refProp.Preview + if propPreview == "PUBLIC" { + propPreview = "" + } + if refProp.Deprecated && refProp.DeprecationMessage == "" { + refProp.DeprecationMessage = "This field is deprecated" + } + + description := refProp.Description + + // If the field doesn't have a description, try to find the referenced type + // and use its description. + if description == "" && refProp.Reference != nil { + refRefPath := *refProp.Reference + refTypeName := strings.TrimPrefix(refRefPath, "#/components/schemas/") + if refType, ok := d.openapi.ref[refTypeName]; ok { + description = refType.Description + } + } + + result[k] = annotation.Descriptor{ + Description: description, + Enum: refProp.Enum, + Preview: propPreview, + DeprecationMessage: refProp.DeprecationMessage, + OutputOnly: isOutputOnly(*refProp), + } + } + } + + return result +} + +// mergeDescriptor merges an override descriptor on top of a base descriptor. +// Non-empty, non-PLACEHOLDER fields in the override take precedence. +func mergeDescriptor(base, override annotation.Descriptor) annotation.Descriptor { + result := base + if override.Description != "" && override.Description != annotation.Placeholder { + result.Description = override.Description + } + if override.MarkdownDescription != "" { + result.MarkdownDescription = override.MarkdownDescription + } + if override.Title != "" { + result.Title = override.Title + } + if override.Default != nil { + result.Default = override.Default + } + if override.Enum != nil { + result.Enum = override.Enum + } + if override.MarkdownExamples != "" { + result.MarkdownExamples = override.MarkdownExamples + } + if override.DeprecationMessage != "" { + result.DeprecationMessage = override.DeprecationMessage + } + if override.Preview != "" { + result.Preview = override.Preview + } + if override.OutputOnly != nil { + result.OutputOnly = override.OutputOnly + } + return result +} + +// Writes missing annotations with placeholder values back to the annotation file. +// Missing annotations are stored with Go type path keys internally, so we +// convert them to bundle path keys before writing. func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error { existingFile, err := os.ReadFile(outputPath) if err != nil { @@ -85,14 +263,21 @@ func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error return err } - for k := range d.missingAnnotations { + // Convert missing annotations from Go type paths to bundle paths. + converted := annotation.File{} + for k, v := range d.missingAnnotations { if !isCliPath(k) { - delete(d.missingAnnotations, k) fmt.Printf("Missing annotations for `%s` that are not in CLI package, try to fetch latest OpenAPI spec and regenerate annotations\n", k) + continue + } + if bundlePath, ok := d.pathMap.typeToBundlePath[k]; ok { + converted[bundlePath] = v + } else { + converted[k] = v } } - missingAnnotations, err := convert.FromTyped(d.missingAnnotations, dyn.NilValue) + missingAnnotations, err := convert.FromTyped(converted, dyn.NilValue) if err != nil { return err } @@ -108,11 +293,7 @@ func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error return err } - err = saveYamlWithStyle(outputPath, outputTyped) - if err != nil { - return err - } - return nil + return saveYamlWithStyle(outputPath, outputTyped) } func getPath(typ reflect.Type) string { diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index e5b3ccd0dd..75383056f7 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -1,4 +1,4 @@ -github.com/databricks/cli/bundle/config.Artifact: +artifacts.*: "build": "description": |- An optional set of build commands to run locally before deployment. @@ -19,11 +19,11 @@ github.com/databricks/cli/bundle/config.Artifact: Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`. "markdown_description": |- Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`. -github.com/databricks/cli/bundle/config.ArtifactFile: +artifacts.*.files.*: "source": "description": |- Required. The artifact source file. -github.com/databricks/cli/bundle/config.Bundle: +bundle: "cluster_id": "description": |- The ID of a cluster to use to run the bundle. @@ -53,14 +53,32 @@ github.com/databricks/cli/bundle/config.Bundle: "uuid": "description": |- Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). -github.com/databricks/cli/bundle/config.Deployment: +bundle.deployment: "fail_on_active_runs": "description": |- Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. "lock": "description": |- The deployment lock attributes. -github.com/databricks/cli/bundle/config.Experimental: +bundle.deployment.lock: + "enabled": + "description": |- + Whether this lock is enabled. + "force": + "description": |- + Whether to force this lock if it is enabled. +bundle.git: + "branch": + "description": |- + The Git branch name. + "markdown_description": |- + The Git branch name. See [\_](/dev-tools/bundles/settings.md#git). + "origin_url": + "description": |- + The origin URL of the repository. + "markdown_description": |- + The origin URL of the repository. See [\_](/dev-tools/bundles/settings.md#git). +experimental: "pydabs": "description": |- The PyDABs configuration. @@ -85,25 +103,103 @@ github.com/databricks/cli/bundle/config.Experimental: "use_legacy_run_as": "description": |- Whether to use the legacy run_as behavior. -github.com/databricks/cli/bundle/config.Git: - "branch": +experimental.pydabs: + "enabled": "description": |- - The Git branch name. - "markdown_description": |- - The Git branch name. See [\_](/dev-tools/bundles/settings.md#git). - "origin_url": + Whether or not PyDABs (Private Preview) is enabled + "import": "description": |- - The origin URL of the repository. + The PyDABs project to import to discover resources, resource generator and mutators + "venv_path": + "description": |- + The Python virtual environment path +github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLog: + "granularities": + "description": |- + Granularities for aggregating data into time windows based on their timestamp. Valid values are 5 minutes, 30 minutes, 1 hour, 1 day, n weeks, 1 month, or 1 year. +github.com/databricks/databricks-sdk-go/service/catalog.MonitorTimeSeries: + "granularities": + "description": |- + Granularities for aggregating data into time windows based on their timestamp. Valid values are 5 minutes, 30 minutes, 1 hour, 1 day, n weeks, 1 month, or 1 year. +github.com/databricks/databricks-sdk-go/service/catalog.SseEncryptionDetailsAlgorithm: + "_": + "description": |- + SSE algorithm to use for encrypting S3 objects + "enum": + - |- + AWS_SSE_KMS + - |- + AWS_SSE_S3 +github.com/databricks/databricks-sdk-go/service/compute.Environment: + "base_environment": + "description": |- + The `base_environment` key refers to an `env.yaml` file that specifies an environment version and a collection of dependencies required for the environment setup. + This `env.yaml` file may itself include a `base_environment` reference pointing to another `env_1.yaml` file. However, when used as a base environment, `env_1.yaml` (or further nested references) will not be processed or included in the final environment, meaning that the resolution of `base_environment` references is not recursive. + "dependencies": + "description": |- + List of pip dependencies, as supported by the version of pip in this environment. +github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo: + "abfss": + "description": |- + Contains the Azure Data Lake Storage destination path +github.com/databricks/databricks-sdk-go/service/compute.Kind: + "_": + "enum": + - |- + CLASSIC_PREVIEW +github.com/databricks/databricks-sdk-go/service/compute.LogAnalyticsInfo: + "log_analytics_primary_key": + "description": |- + The primary key for the Azure Log Analytics agent configuration + "log_analytics_workspace_id": + "description": |- + The workspace ID for the Azure Log Analytics agent configuration +github.com/databricks/databricks-sdk-go/service/jobs.DashboardTask: + "filters": + "description": |- + Dashboard task parameters. Used to apply dashboard filter values during dashboard task execution. Parameter values get applied to any dashboard filters that have a matching URL identifier as the parameter key. + The parameter value format is dependent on the filter type: + - For text and single-select filters, provide a single value (e.g. `"value"`) + - For date and datetime filters, provide the value in ISO 8601 format (e.g. `"2000-01-01T00:00:00"`) + - For multi-select filters, provide a JSON array of values (e.g. `"[\"value1\",\"value2\"]"`) + - For range and date range filters, provide a JSON object with `start` and `end` (e.g. `"{\"start\":\"1\",\"end\":\"10\"}"`) +github.com/databricks/databricks-sdk-go/service/jobs.SparseCheckout: + "patterns": + "description": |- + List of patterns to include for sparse checkout. +github.com/databricks/databricks-sdk-go/service/pipelines.PipelineLibrary: + "whl": + "deprecation_message": |- + This field is deprecated +github.com/databricks/databricks-sdk-go/service/postgres.EndpointGroupSpec: + "max": + "description": |- + The maximum number of computes in the endpoint group. Currently, this must be equal to min. Set to 1 for single + compute endpoints, to disable HA. To manually suspend all computes in an endpoint group, set disabled to + true on the EndpointSpec. + "min": + "description": |- + The minimum number of computes in the endpoint group. Currently, this must be equal to max. This must be greater + than or equal to 1. +permissions.*: + "-": + "description": |- + Defines a permission for a specific entity. "markdown_description": |- - The origin URL of the repository. See [\_](/dev-tools/bundles/settings.md#git). -github.com/databricks/cli/bundle/config.Lock: - "enabled": + Defines a permission for a specific entity. See [\_](/dev-tools/bundles/settings.md#permissions) and [\_](/dev-tools/bundles/permissions.md). + "group_name": "description": |- - Whether this lock is enabled. - "force": + The name of the group that has the permission set in level. + "level": "description": |- - Whether to force this lock if it is enabled. -github.com/databricks/cli/bundle/config.Presets: + The allowed permission for user, group, service principal defined for this permission. + "service_principal_name": + "description": |- + The name of the service principal that has the permission set in level. + "user_name": + "description": |- + The name of the user that has the permission set in level. +presets: "artifacts_dynamic_version": "description": |- Whether to enable dynamic_version on all artifacts. @@ -125,17 +221,7 @@ github.com/databricks/cli/bundle/config.Presets: "trigger_pause_status": "description": |- A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. -github.com/databricks/cli/bundle/config.PyDABs: - "enabled": - "description": |- - Whether or not PyDABs (Private Preview) is enabled - "import": - "description": |- - The PyDABs project to import to discover resources, resource generator and mutators - "venv_path": - "description": |- - The Python virtual environment path -github.com/databricks/cli/bundle/config.Python: +python: "mutators": "description": |- Mutators contains a list of fully qualified function paths to mutator functions. @@ -153,7 +239,7 @@ github.com/databricks/cli/bundle/config.Python: If enabled, Python code will execute within this environment. If disabled, it defaults to using the Python interpreter available in the current shell. -github.com/databricks/cli/bundle/config.Resources: +resources: "alerts": "description": |- PLACEHOLDER @@ -251,265 +337,179 @@ github.com/databricks/cli/bundle/config.Resources: The volume definitions for the bundle, where each key is the name of the volume. "markdown_description": |- The volume definitions for the bundle, where each key is the name of the volume. See [\_](/dev-tools/bundles/resources.md#volumes). -github.com/databricks/cli/bundle/config.Root: - "artifacts": - "description": |- - Defines the attributes to build an artifact - "markdown_description": |- - Defines the attributes to build artifacts, where each key is the name of the artifact, and the value is a Map that defines the artifact build settings. For information about the `artifacts` mapping, see [\_](/dev-tools/bundles/settings.md#artifacts). - - Artifact settings defined in the top level of the bundle configuration can be overridden in the `targets` mapping. See [\_](/dev-tools/bundles/artifact-overrides.md). - "markdown_examples": |- - ```yaml - artifacts: - default: - type: whl - build: poetry build - path: . - ``` - "bundle": - "description": |- - The bundle attributes when deploying to this target. - "markdown_description": |- - The bundle attributes when deploying to this target, - "environments": +resources.alerts.*: + "create_time": "description": |- PLACEHOLDER - "deprecation_message": |- - Deprecated: please use targets instead - "experimental": + "custom_description": "description": |- - Defines attributes for experimental features. - "include": + PLACEHOLDER + "custom_summary": "description": |- - Specifies a list of path globs that contain configuration files to include within the bundle. - "markdown_description": |- - Specifies a list of path globs that contain configuration files to include within the bundle. See [\_](/dev-tools/bundles/settings.md#include). - "permissions": + PLACEHOLDER + "display_name": "description": |- - Defines a permission for a specific entity. - "markdown_description": |- - A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity. - - See [\_](/dev-tools/bundles/settings.md#permissions) and [\_](/dev-tools/bundles/permissions.md). - "markdown_examples": |- - ```yaml - permissions: - - level: CAN_VIEW - group_name: test-group - - level: CAN_MANAGE - user_name: someone@example.com - - level: CAN_RUN - service_principal_name: 123456-abcdef - ``` - "presets": + PLACEHOLDER + "effective_run_as": "description": |- - Defines bundle deployment presets. - "markdown_description": |- - Defines bundle deployment presets. See [\_](/dev-tools/bundles/deployment-modes.md#presets). - "python": + PLACEHOLDER + "file_path": "description": |- PLACEHOLDER - "resources": + "id": "description": |- - A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. - "markdown_description": |- - A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. For more information about Databricks Asset Bundles supported resources, and resource definition reference, see [\_](/dev-tools/bundles/resources.md). - - ```yaml - resources: - : - : - : - ``` - "run_as": + PLACEHOLDER + "lifecycle": "description": |- - The identity to use when running Databricks Asset Bundles workflows. - "markdown_description": |- - The identity to use when running Databricks Asset Bundles workflows. See [\_](/dev-tools/bundles/run-as.md). - "scripts": + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "lifecycle_state": "description": |- PLACEHOLDER - "sync": + "owner_user_name": "description": |- - The files and file paths to include or exclude in the bundle. - "markdown_description": |- - The files and file paths to include or exclude in the bundle. See [\_](/dev-tools/bundles/settings.md#sync). - "targets": + PLACEHOLDER + "parent_path": "description": |- - Defines deployment targets for the bundle. - "markdown_description": |- - Defines deployment targets for the bundle. See [\_](/dev-tools/bundles/settings.md#targets) - "variables": + PLACEHOLDER + "permissions": "description": |- - A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable. - "workspace": + PLACEHOLDER + "query_text": "description": |- - Defines the Databricks workspace for the bundle. - "markdown_description": |- - Defines the Databricks workspace for the bundle. See [\_](/dev-tools/bundles/settings.md#workspace). -github.com/databricks/cli/bundle/config.Script: - "content": + PLACEHOLDER + "run_as": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config.Sync: - "exclude": + "run_as_user_name": "description": |- - A list of files or folders to exclude from the bundle. - "include": + PLACEHOLDER + "update_time": "description": |- - A list of files or folders to include in the bundle. - "paths": + PLACEHOLDER + "warehouse_id": "description": |- - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. -github.com/databricks/cli/bundle/config.Target: - "artifacts": + PLACEHOLDER +resources.alerts.*.permissions.*: + "group_name": "description": |- - The artifacts to include in the target deployment. - "bundle": + PLACEHOLDER + "level": "description": |- - The bundle attributes when deploying to this target. - "cluster_id": + PLACEHOLDER + "service_principal_name": "description": |- - The ID of the cluster to use for this target. - "compute_id": + PLACEHOLDER + "user_name": "description": |- - Deprecated. The ID of the compute to use for this target. - "deprecation_message": |- - Deprecated: please use cluster_id instead - "default": + PLACEHOLDER +resources.alerts.*.permissions.*.level: + "_": + "enum": + - |- + CAN_EDIT + - |- + CAN_MANAGE + - |- + CAN_READ + - |- + CAN_RUN +resources.apps.*: + "config": "description": |- - Whether this target is the default target. - "git": + PLACEHOLDER + "git_source": "description": |- - The Git version control settings for the target. - "mode": + Git source configuration for app deployments. Specifies which git reference (branch, tag, or commit) + to use when deploying the app. Used in conjunction with git_repository to deploy code directly from git. + The source_code_path within git_source specifies the relative path to the app code within the repository. + "lifecycle": "description": |- - The deployment mode for the target. - "markdown_description": |- - The deployment mode for the target. Valid values are `development` or `production`. See [\_](/dev-tools/bundles/deployment-modes.md). + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. "permissions": "description": |- - The permissions for deploying and running the bundle in the target. - "presets": + PLACEHOLDER + "source_code_path": "description": |- - The deployment presets for the target. - "resources": + PLACEHOLDER +resources.apps.*.config: + "command": "description": |- - The resource definitions for the target. - "run_as": + PLACEHOLDER + "env": "description": |- - The identity to use to run the bundle. - "markdown_description": |- - The identity to use to run the bundle, see [\_](/dev-tools/bundles/run-as.md). - "sync": + PLACEHOLDER +resources.apps.*.config.env.*: + "name": "description": |- - The local paths to sync to the target workspace when a bundle is run or deployed. - "variables": + PLACEHOLDER + "value": "description": |- - The custom variable definitions for the target. - "workspace": + PLACEHOLDER + "value_from": "description": |- - The Databricks workspace for the target. -github.com/databricks/cli/bundle/config.Workspace: - "artifact_path": + PLACEHOLDER +resources.apps.*.permissions.*: + "group_name": "description": |- - The artifact path to use within the workspace for both deployments and workflow runs - "auth_type": + PLACEHOLDER + "level": "description": |- - The authentication type. - "azure_client_id": + PLACEHOLDER + "service_principal_name": "description": |- - The Azure client ID - "azure_environment": - "description": |- - The Azure environment - "azure_login_app_id": - "description": |- - The Azure login app ID - "azure_tenant_id": - "description": |- - The Azure tenant ID - "azure_use_msi": - "description": |- - Whether to use MSI for Azure - "azure_workspace_resource_id": - "description": |- - The Azure workspace resource ID - "client_id": - "description": |- - The client ID for the workspace - "experimental_is_unified_host": - "description": |- - Experimental feature flag to indicate if the host is a unified host - "file_path": - "description": |- - The file path to use within the workspace for both deployments and workflow runs - "google_service_account": - "description": |- - The Google service account name - "host": - "description": |- - The Databricks workspace host URL - "profile": - "description": |- - The Databricks workspace profile name - "resource_path": - "description": |- - The workspace resource path - "root_path": - "description": |- - The Databricks workspace root path - "state_path": - "description": |- - The workspace state path - "workspace_id": - "description": |- - The Databricks workspace ID -github.com/databricks/cli/bundle/config/resources.Alert: - "create_time": + PLACEHOLDER + "user_name": "description": |- PLACEHOLDER - "custom_description": - "description": |- - PLACEHOLDER - "custom_summary": +resources.apps.*.permissions.*.level: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_USE +resources.catalogs.*: + "comment": "description": |- PLACEHOLDER - "display_name": + "connection_name": "description": |- PLACEHOLDER - "effective_run_as": + "grants": "description": |- PLACEHOLDER - "id": + "lifecycle": "description": |- - PLACEHOLDER - "lifecycle_state": + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "name": "description": |- PLACEHOLDER - "owner_user_name": + "options": "description": |- PLACEHOLDER - "parent_path": + "properties": "description": |- PLACEHOLDER - "query_text": + "provider_name": "description": |- PLACEHOLDER - "run_as": + "share_name": "description": |- PLACEHOLDER - "run_as_user_name": + "storage_root": "description": |- PLACEHOLDER - "update_time": +resources.clusters.*: + "_": + "markdown_description": |- + The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). + "lifecycle": "description": |- - PLACEHOLDER - "warehouse_id": + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "permissions": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.AlertPermission: +resources.clusters.*.permissions.*: "group_name": "description": |- PLACEHOLDER @@ -522,68 +522,80 @@ github.com/databricks/cli/bundle/config/resources.AlertPermission: "user_name": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.App: - "git_source": - "description": |- - Git source configuration for app deployments. Specifies which git reference (branch, tag, or commit) - to use when deploying the app. Used in conjunction with git_repository to deploy code directly from git. - The source_code_path within git_source specifies the relative path to the app code within the repository. -github.com/databricks/cli/bundle/config/resources.AppConfig: - "command": - "description": |- - PLACEHOLDER - "env": +resources.clusters.*.permissions.*.level: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_RESTART + - |- + CAN_ATTACH_TO +resources.dashboards.*: + "_": + "markdown_description": |- + The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](/dashboards/index.md). + "create_time": "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.AppEnvVar: - "name": + The timestamp of when the dashboard was created. + "dashboard_id": "description": |- - PLACEHOLDER - "value": + UUID identifying the dashboard. + "dataset_catalog": "description": |- - PLACEHOLDER - "value_from": + Sets the default catalog for all datasets in this dashboard. When set, this overrides the catalog specified in individual dataset definitions. + "dataset_schema": "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.AppPermission: - "group_name": + Sets the default schema for all datasets in this dashboard. When set, this overrides the schema specified in individual dataset definitions. + "display_name": "description": |- - PLACEHOLDER - "level": + The display name of the dashboard. + "embed_credentials": "description": |- PLACEHOLDER - "service_principal_name": + "etag": "description": |- - PLACEHOLDER - "user_name": + The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard + has not been modified since the last read. + This field is excluded in List Dashboards responses. + "file_path": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.Catalog: - "comment": + "lifecycle": "description": |- - PLACEHOLDER - "connection_name": + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "lifecycle_state": "description": |- - PLACEHOLDER - "name": + The state of the dashboard resource. Used for tracking trashed status. + "parent_path": "description": |- - PLACEHOLDER - "options": + The workspace path of the folder containing the dashboard. Includes leading slash and no + trailing slash. + This field is excluded in List Dashboards responses. + "path": "description": |- - PLACEHOLDER - "properties": + The workspace path of the dashboard asset, including the file name. + Exported dashboards always have the file extension `.lvdash.json`. + This field is excluded in List Dashboards responses. + "permissions": "description": |- PLACEHOLDER - "provider_name": + "serialized_dashboard": "description": |- - PLACEHOLDER - "share_name": + The contents of the dashboard in serialized string form. + This field is excluded in List Dashboards responses. + Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) + to retrieve an example response, which includes the `serialized_dashboard` field. + This field provides the structure of the JSON string that represents the dashboard's + layout and components. + "update_time": "description": |- - PLACEHOLDER - "storage_root": + The timestamp of when the dashboard was last updated by the user. + This field is excluded in List Dashboards responses. + "warehouse_id": "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.ClusterPermission: + The warehouse ID used to run the dashboard. +resources.dashboards.*.permissions.*: "group_name": "description": |- PLACEHOLDER @@ -596,31 +608,32 @@ github.com/databricks/cli/bundle/config/resources.ClusterPermission: "user_name": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.Dashboard: - "dataset_catalog": - "description": |- - Sets the default catalog for all datasets in this dashboard. When set, this overrides the catalog specified in individual dataset definitions. - "dataset_schema": - "description": |- - Sets the default schema for all datasets in this dashboard. When set, this overrides the schema specified in individual dataset definitions. -github.com/databricks/cli/bundle/config/resources.DashboardPermission: - "group_name": - "description": |- - PLACEHOLDER - "level": +resources.dashboards.*.permissions.*.level: + "_": + "enum": + - |- + CAN_READ + - |- + CAN_RUN + - |- + CAN_EDIT + - |- + CAN_MANAGE +resources.database_catalogs.*: + "lifecycle": "description": |- - PLACEHOLDER - "service_principal_name": + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +resources.database_instances.*: + "effective_capacity": "description": |- PLACEHOLDER - "user_name": + "lifecycle": "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.DatabaseInstance: - "effective_capacity": + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "permissions": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.DatabaseInstancePermission: +resources.database_instances.*.permissions.*: "group_name": "description": |- PLACEHOLDER @@ -633,7 +646,26 @@ github.com/databricks/cli/bundle/config/resources.DatabaseInstancePermission: "user_name": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.DatabaseProjectPermission: +resources.database_instances.*.permissions.*.level: + "_": + "enum": + - |- + CAN_CREATE + - |- + CAN_USE + - |- + CAN_MANAGE +resources.experiments.*: + "_": + "markdown_description": |- + The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](/mlflow/experiments.md). + "lifecycle": + "description": |- + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "permissions": + "description": |- + PLACEHOLDER +resources.experiments.*.permissions.*: "group_name": "description": |- PLACEHOLDER @@ -646,13 +678,25 @@ github.com/databricks/cli/bundle/config/resources.DatabaseProjectPermission: "user_name": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.ExternalLocation: +resources.experiments.*.permissions.*.level: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_EDIT + - |- + CAN_READ +resources.external_locations.*: "comment": "description": |- PLACEHOLDER "credential_name": "description": |- PLACEHOLDER + "effective_enable_file_events": + "description": |- + PLACEHOLDER "enable_file_events": "description": |- PLACEHOLDER @@ -670,7 +714,7 @@ github.com/databricks/cli/bundle/config/resources.ExternalLocation: PLACEHOLDER "lifecycle": "description": |- - PLACEHOLDER + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. "name": "description": |- PLACEHOLDER @@ -683,24 +727,21 @@ github.com/databricks/cli/bundle/config/resources.ExternalLocation: "url": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.JobPermission: - "group_name": - "description": |- - PLACEHOLDER - "level": - "description": |- - PLACEHOLDER - "service_principal_name": +resources.jobs.*: + "_": + "markdown_description": |- + The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a Databricks Asset Bundles template to create a job, see [_](/dev-tools/bundles/jobs-tutorial.md). + "lifecycle": "description": |- - PLACEHOLDER - "user_name": + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "permissions": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.Lifecycle: +resources.jobs.*.lifecycle: "prevent_destroy": "description": |- Lifecycle setting to prevent the resource from being destroyed. -github.com/databricks/cli/bundle/config/resources.MlflowExperimentPermission: +resources.jobs.*.permissions.*: "group_name": "description": |- PLACEHOLDER @@ -713,7 +754,28 @@ github.com/databricks/cli/bundle/config/resources.MlflowExperimentPermission: "user_name": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.MlflowModelPermission: +resources.jobs.*.permissions.*.level: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_MANAGE_RUN + - |- + CAN_VIEW + - |- + IS_OWNER +resources.model_serving_endpoints.*: + "_": + "markdown_description": |- + The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](/machine-learning/model-serving/manage-serving-endpoints.md). + "lifecycle": + "description": |- + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "permissions": + "description": |- + PLACEHOLDER +resources.model_serving_endpoints.*.permissions.*: "group_name": "description": |- PLACEHOLDER @@ -726,7 +788,26 @@ github.com/databricks/cli/bundle/config/resources.MlflowModelPermission: "user_name": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.ModelServingEndpointPermission: +resources.model_serving_endpoints.*.permissions.*.level: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_QUERY + - |- + CAN_VIEW +resources.models.*: + "_": + "markdown_description": |- + The model resource allows you to define [legacy models](/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use Unity Catalog [registered models](#registered-model) instead. + "lifecycle": + "description": |- + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "permissions": + "description": |- + PLACEHOLDER +resources.models.*.permissions.*: "group_name": "description": |- PLACEHOLDER @@ -739,25 +820,30 @@ github.com/databricks/cli/bundle/config/resources.ModelServingEndpointPermission "user_name": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.Permission: - "-": - "description": |- - Defines a permission for a specific entity. +resources.models.*.permissions.*.level: + "_": + "enum": + - |- + CAN_EDIT + - |- + CAN_MANAGE + - |- + CAN_MANAGE_STAGING_VERSIONS + - |- + CAN_MANAGE_PRODUCTION_VERSIONS + - |- + CAN_READ +resources.pipelines.*: + "_": "markdown_description": |- - Defines a permission for a specific entity. See [\_](/dev-tools/bundles/settings.md#permissions) and [\_](/dev-tools/bundles/permissions.md). - "group_name": - "description": |- - The name of the group that has the permission set in level. - "level": - "description": |- - The allowed permission for user, group, service principal defined for this permission. - "service_principal_name": + The pipeline resource allows you to create Delta Live Tables [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Databricks Asset Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + "lifecycle": "description": |- - The name of the service principal that has the permission set in level. - "user_name": + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "permissions": "description": |- - The name of the user that has the permission set in level. -github.com/databricks/cli/bundle/config/resources.PipelinePermission: + PLACEHOLDER +resources.pipelines.*.permissions.*: "group_name": "description": |- PLACEHOLDER @@ -770,7 +856,18 @@ github.com/databricks/cli/bundle/config/resources.PipelinePermission: "user_name": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.PostgresBranch: +resources.pipelines.*.permissions.*.level: + "_": + "enum": + - |- + CAN_MANAGE + - |- + IS_OWNER + - |- + CAN_RUN + - |- + CAN_VIEW +resources.postgres_branches.*: "branch_id": "description": |- PLACEHOLDER @@ -785,7 +882,7 @@ github.com/databricks/cli/bundle/config/resources.PostgresBranch: PLACEHOLDER "lifecycle": "description": |- - PLACEHOLDER + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. "name": "description": |- PLACEHOLDER @@ -819,7 +916,7 @@ github.com/databricks/cli/bundle/config/resources.PostgresBranch: "update_time": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.PostgresEndpoint: +resources.postgres_endpoints.*: "autoscaling_limit_max_cu": "description": |- PLACEHOLDER @@ -843,7 +940,7 @@ github.com/databricks/cli/bundle/config/resources.PostgresEndpoint: PLACEHOLDER "lifecycle": "description": |- - PLACEHOLDER + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. "name": "description": |- PLACEHOLDER @@ -871,7 +968,7 @@ github.com/databricks/cli/bundle/config/resources.PostgresEndpoint: "update_time": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.PostgresProject: +resources.postgres_projects.*: "budget_policy_id": "description": |- PLACEHOLDER @@ -892,7 +989,7 @@ github.com/databricks/cli/bundle/config/resources.PostgresProject: PLACEHOLDER "lifecycle": "description": |- - PLACEHOLDER + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. "name": "description": |- PLACEHOLDER @@ -917,36 +1014,7 @@ github.com/databricks/cli/bundle/config/resources.PostgresProject: "update_time": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.SecretScope: - "backend_type": - "description": |- - The backend type the scope will be created with. If not specified, will default to `DATABRICKS` - "keyvault_metadata": - "description": |- - The metadata for the secret scope if the `backend_type` is `AZURE_KEYVAULT` - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "name": - "description": |- - Scope name requested by the user. Scope names are unique. - "permissions": - "description": |- - The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. -github.com/databricks/cli/bundle/config/resources.SecretScopePermission: - "group_name": - "description": |- - The name of the group that has the permission set in level. This field translates to a `principal` field in secret scope ACL. - "level": - "description": |- - The allowed permission for user, group, service principal defined for this permission. - "service_principal_name": - "description": |- - The application ID of an active service principal. This field translates to a `principal` field in secret scope ACL. - "user_name": - "description": |- - The name of the user that has the permission set in level. This field translates to a `principal` field in secret scope ACL. -github.com/databricks/cli/bundle/config/resources.SqlWarehousePermission: +resources.postgres_projects.*.permissions.*: "group_name": "description": |- PLACEHOLDER @@ -959,8 +1027,140 @@ github.com/databricks/cli/bundle/config/resources.SqlWarehousePermission: "user_name": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.SyncedDatabaseTable: - "data_synchronization_status": +resources.postgres_projects.*.permissions.*.level: + "_": + "enum": + - |- + CAN_USE + - |- + CAN_MANAGE +resources.quality_monitors.*: + "_": + "markdown_description": |- + The quality_monitor resource allows you to define a Unity Catalog [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](/machine-learning/model-serving/monitor-diagnose-endpoints.md). + "lifecycle": + "description": |- + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "table_name": + "description": |- + PLACEHOLDER +resources.registered_models.*: + "_": + "markdown_description": |- + The registered model resource allows you to define models in Unity Catalog. For information about Unity Catalog [registered models](/api/workspace/registeredmodels/create), see [_](/machine-learning/manage-model-lifecycle/index.md). + "grants": + "description": |- + PLACEHOLDER + "lifecycle": + "description": |- + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +resources.schemas.*: + "_": + "markdown_description": |- + The schema resource type allows you to define Unity Catalog [schemas](/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: + + - The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema. + - Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](/api/workspace/schemas/update). + "grants": + "description": |- + PLACEHOLDER + "lifecycle": + "description": |- + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +resources.secret_scopes.*: + "backend_type": + "description": |- + The backend type the scope will be created with. If not specified, will default to `DATABRICKS` + "keyvault_metadata": + "description": |- + The metadata for the secret scope if the `backend_type` is `AZURE_KEYVAULT` + "lifecycle": + "description": |- + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "name": + "description": |- + Scope name requested by the user. Scope names are unique. + "permissions": + "description": |- + The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. +resources.secret_scopes.*.permissions.*: + "group_name": + "description": |- + The name of the group that has the permission set in level. This field translates to a `principal` field in secret scope ACL. + "level": + "description": |- + The allowed permission for user, group, service principal defined for this permission. + "service_principal_name": + "description": |- + The application ID of an active service principal. This field translates to a `principal` field in secret scope ACL. + "user_name": + "description": |- + The name of the user that has the permission set in level. This field translates to a `principal` field in secret scope ACL. +resources.secret_scopes.*.permissions.*.level: + "_": + "enum": + - |- + READ + - |- + WRITE + - |- + MANAGE +resources.sql_warehouses.*: + "cluster_size": + "description": |- + Size of the clusters allocated for this warehouse. + Increasing the size of a spark cluster allows you to run larger queries on + it. If you want to increase the number of concurrent queries, please tune + max_num_clusters. + + Supported values: + - 2X-Small + - X-Small + - Small + - Medium + - Large + - X-Large + - 2X-Large + - 3X-Large + - 4X-Large + - 5X-Large + "enable_photon": + "description": |- + Configures whether the warehouse should use Photon optimized clusters. + + Defaults to true. + "lifecycle": + "description": |- + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + "permissions": + "description": |- + PLACEHOLDER +resources.sql_warehouses.*.permissions.*: + "group_name": + "description": |- + PLACEHOLDER + "level": + "description": |- + PLACEHOLDER + "service_principal_name": + "description": |- + PLACEHOLDER + "user_name": + "description": |- + PLACEHOLDER +resources.sql_warehouses.*.permissions.*.level: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_USE + - |- + CAN_MONITOR + - |- + CAN_VIEW +resources.synced_database_tables.*: + "data_synchronization_status": "description": |- PLACEHOLDER "database_instance_name": @@ -972,6 +1172,9 @@ github.com/databricks/cli/bundle/config/resources.SyncedDatabaseTable: "effective_logical_database_name": "description": |- PLACEHOLDER + "lifecycle": + "description": |- + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. "logical_database_name": "description": |- PLACEHOLDER @@ -984,44 +1187,188 @@ github.com/databricks/cli/bundle/config/resources.SyncedDatabaseTable: "unity_catalog_provisioning_state": "description": |- PLACEHOLDER -github.com/databricks/cli/bundle/config/variable.Lookup: - "alert": +resources.volumes.*: + "_": + "markdown_description": |- + The volume resource type allows you to define and create Unity Catalog [volumes](/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: + + - A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use Databricks Asset Bundles to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments. + + - Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets). + "grants": "description": |- - The name of the alert for which to retrieve an ID. - "cluster": + PLACEHOLDER + "lifecycle": "description": |- - The name of the cluster for which to retrieve an ID. - "cluster_policy": + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +root: + "artifacts": "description": |- - The name of the cluster_policy for which to retrieve an ID. - "dashboard": + Defines the attributes to build an artifact + "markdown_description": |- + Defines the attributes to build artifacts, where each key is the name of the artifact, and the value is a Map that defines the artifact build settings. For information about the `artifacts` mapping, see [\_](/dev-tools/bundles/settings.md#artifacts). + + Artifact settings defined in the top level of the bundle configuration can be overridden in the `targets` mapping. See [\_](/dev-tools/bundles/artifact-overrides.md). + "markdown_examples": |- + ```yaml + artifacts: + default: + type: whl + build: poetry build + path: . + ``` + "bundle": "description": |- - The name of the dashboard for which to retrieve an ID. - "instance_pool": + The bundle attributes when deploying to this target. + "markdown_description": |- + The bundle attributes when deploying to this target, + "environments": "description": |- - The name of the instance_pool for which to retrieve an ID. - "job": + PLACEHOLDER + "deprecation_message": |- + Deprecated: please use targets instead + "experimental": "description": |- - The name of the job for which to retrieve an ID. - "metastore": + Defines attributes for experimental features. + "include": "description": |- - The name of the metastore for which to retrieve an ID. - "notification_destination": + Specifies a list of path globs that contain configuration files to include within the bundle. + "markdown_description": |- + Specifies a list of path globs that contain configuration files to include within the bundle. See [\_](/dev-tools/bundles/settings.md#include). + "permissions": "description": |- - The name of the notification_destination for which to retrieve an ID. - "pipeline": + Defines a permission for a specific entity. + "markdown_description": |- + A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity. + + See [\_](/dev-tools/bundles/settings.md#permissions) and [\_](/dev-tools/bundles/permissions.md). + "markdown_examples": |- + ```yaml + permissions: + - level: CAN_VIEW + group_name: test-group + - level: CAN_MANAGE + user_name: someone@example.com + - level: CAN_RUN + service_principal_name: 123456-abcdef + ``` + "presets": "description": |- - The name of the pipeline for which to retrieve an ID. - "query": + Defines bundle deployment presets. + "markdown_description": |- + Defines bundle deployment presets. See [\_](/dev-tools/bundles/deployment-modes.md#presets). + "python": "description": |- - The name of the query for which to retrieve an ID. - "service_principal": + PLACEHOLDER + "resources": "description": |- - The name of the service_principal for which to retrieve an ID. - "warehouse": + A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. + "markdown_description": |- + A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. For more information about Databricks Asset Bundles supported resources, and resource definition reference, see [\_](/dev-tools/bundles/resources.md). + + ```yaml + resources: + : + : + : + ``` + "run_as": "description": |- - The name of the warehouse for which to retrieve an ID. -github.com/databricks/cli/bundle/config/variable.TargetVariable: + The identity to use when running Databricks Asset Bundles workflows. + "markdown_description": |- + The identity to use when running Databricks Asset Bundles workflows. See [\_](/dev-tools/bundles/run-as.md). + "scripts": + "description": |- + PLACEHOLDER + "sync": + "description": |- + The files and file paths to include or exclude in the bundle. + "markdown_description": |- + The files and file paths to include or exclude in the bundle. See [\_](/dev-tools/bundles/settings.md#sync). + "targets": + "description": |- + Defines deployment targets for the bundle. + "markdown_description": |- + Defines deployment targets for the bundle. See [\_](/dev-tools/bundles/settings.md#targets) + "variables": + "description": |- + A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable. + "workspace": + "description": |- + Defines the Databricks workspace for the bundle. + "markdown_description": |- + Defines the Databricks workspace for the bundle. See [\_](/dev-tools/bundles/settings.md#workspace). +run_as: + "service_principal_name": + "description": |- + The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + "user_name": + "description": |- + The email of an active workspace user. Non-admin users can only set this field to their own email. +scripts.*: + "content": + "description": |- + PLACEHOLDER +sync: + "exclude": + "description": |- + A list of files or folders to exclude from the bundle. + "include": + "description": |- + A list of files or folders to include in the bundle. + "paths": + "description": |- + The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. +targets.*: + "artifacts": + "description": |- + The artifacts to include in the target deployment. + "bundle": + "description": |- + The bundle attributes when deploying to this target. + "cluster_id": + "description": |- + The ID of the cluster to use for this target. + "compute_id": + "description": |- + Deprecated. The ID of the compute to use for this target. + "deprecation_message": |- + Deprecated: please use cluster_id instead + "default": + "description": |- + Whether this target is the default target. + "git": + "description": |- + The Git version control settings for the target. + "mode": + "description": |- + The deployment mode for the target. + "markdown_description": |- + The deployment mode for the target. Valid values are `development` or `production`. See [\_](/dev-tools/bundles/deployment-modes.md). + "permissions": + "description": |- + The permissions for deploying and running the bundle in the target. + "presets": + "description": |- + The deployment presets for the target. + "resources": + "description": |- + The resource definitions for the target. + "run_as": + "description": |- + The identity to use to run the bundle. + "markdown_description": |- + The identity to use to run the bundle, see [\_](/dev-tools/bundles/run-as.md). + "sync": + "description": |- + The local paths to sync to the target workspace when a bundle is run or deployed. + "variables": + "description": |- + The custom variable definitions for the target. + "workspace": + "description": |- + The Databricks workspace for the target. +targets.*.variables.*: "default": "description": |- The default value for the variable. @@ -1037,7 +1384,7 @@ github.com/databricks/cli/bundle/config/variable.TargetVariable: "type": "description": |- The type of the variable. -github.com/databricks/cli/bundle/config/variable.Variable: +variables.*: "_": "description": |- Defines a custom variable for the bundle. @@ -1057,10 +1404,95 @@ github.com/databricks/cli/bundle/config/variable.Variable: "type": "description": |- The type of the variable. -github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs: - "service_principal_name": +variables.*.lookup: + "alert": "description": |- - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - "user_name": + The name of the alert for which to retrieve an ID. + "cluster": "description": |- - The email of an active workspace user. Non-admin users can only set this field to their own email. + The name of the cluster for which to retrieve an ID. + "cluster_policy": + "description": |- + The name of the cluster_policy for which to retrieve an ID. + "dashboard": + "description": |- + The name of the dashboard for which to retrieve an ID. + "instance_pool": + "description": |- + The name of the instance_pool for which to retrieve an ID. + "job": + "description": |- + The name of the job for which to retrieve an ID. + "metastore": + "description": |- + The name of the metastore for which to retrieve an ID. + "notification_destination": + "description": |- + The name of the notification_destination for which to retrieve an ID. + "pipeline": + "description": |- + The name of the pipeline for which to retrieve an ID. + "query": + "description": |- + The name of the query for which to retrieve an ID. + "service_principal": + "description": |- + The name of the service_principal for which to retrieve an ID. + "warehouse": + "description": |- + The name of the warehouse for which to retrieve an ID. +workspace: + "artifact_path": + "description": |- + The artifact path to use within the workspace for both deployments and workflow runs + "auth_type": + "description": |- + The authentication type. + "azure_client_id": + "description": |- + The Azure client ID + "azure_environment": + "description": |- + The Azure environment + "azure_login_app_id": + "description": |- + The Azure login app ID + "azure_tenant_id": + "description": |- + The Azure tenant ID + "azure_use_msi": + "description": |- + Whether to use MSI for Azure + "azure_workspace_resource_id": + "description": |- + The Azure workspace resource ID + "client_id": + "description": |- + The client ID for the workspace + "experimental_is_unified_host": + "description": |- + Experimental feature flag to indicate if the host is a unified host + "file_path": + "description": |- + The file path to use within the workspace for both deployments and workflow runs + "google_service_account": + "description": |- + The Google service account name + "host": + "description": |- + The Databricks workspace host URL + "profile": + "description": |- + The Databricks workspace profile name + "resource_path": + "description": |- + The workspace resource path + "root_path": + "description": |- + The Databricks workspace root path + "state_path": + "description": |- + The workspace state path + "workspace_id": + "description": |- + The Databricks workspace ID diff --git a/bundle/internal/schema/annotations_openapi.yml b/bundle/internal/schema/annotations_openapi.yml deleted file mode 100644 index 882f5c1cf2..0000000000 --- a/bundle/internal/schema/annotations_openapi.yml +++ /dev/null @@ -1,5699 +0,0 @@ -# This file is auto-generated. DO NOT EDIT. -github.com/databricks/cli/bundle/config/resources.Alert: - "create_time": - "description": |- - The timestamp indicating when the alert was created. - "x-databricks-field-behaviors_output_only": |- - true - "custom_description": - "description": |- - Custom description for the alert. support mustache template. - "custom_summary": - "description": |- - Custom summary for the alert. support mustache template. - "display_name": - "description": |- - The display name of the alert. - "effective_run_as": - "description": |- - The actual identity that will be used to execute the alert. - This is an output-only field that shows the resolved run-as identity after applying - permissions and defaults. - "x-databricks-field-behaviors_output_only": |- - true - "evaluation": {} - "id": - "description": |- - UUID identifying the alert. - "x-databricks-field-behaviors_output_only": |- - true - "lifecycle_state": - "description": |- - Indicates whether the query is trashed. - "x-databricks-field-behaviors_output_only": |- - true - "owner_user_name": - "description": |- - The owner's username. This field is set to "Unavailable" if the user has been deleted. - "x-databricks-field-behaviors_output_only": |- - true - "parent_path": - "description": |- - The workspace path of the folder containing the alert. Can only be set on create, and cannot be updated. - "query_text": - "description": |- - Text of the query to be run. - "run_as": - "description": |- - Specifies the identity that will be used to run the alert. - This field allows you to configure alerts to run as a specific user or service principal. - - For user identity: Set `user_name` to the email of an active workspace user. Users can only set this to their own email. - - For service principal: Set `service_principal_name` to the application ID. Requires the `servicePrincipal/user` role. - If not specified, the alert will run as the request user. - "run_as_user_name": - "description": |- - The run as username or application ID of service principal. - On Create and Update, this field can be set to application ID of an active service principal. Setting this field requires the servicePrincipal/user role. - Deprecated: Use `run_as` field instead. This field will be removed in a future release. - "deprecation_message": |- - This field is deprecated - "schedule": {} - "update_time": - "description": |- - The timestamp indicating when the alert was updated. - "x-databricks-field-behaviors_output_only": |- - true - "warehouse_id": - "description": |- - ID of the SQL warehouse attached to the alert. -github.com/databricks/cli/bundle/config/resources.App: - "active_deployment": - "description": |- - The active deployment of the app. A deployment is considered active when it has been deployed - to the app compute. - "x-databricks-field-behaviors_output_only": |- - true - "app_status": - "x-databricks-field-behaviors_output_only": |- - true - "budget_policy_id": {} - "compute_size": {} - "compute_status": - "x-databricks-field-behaviors_output_only": |- - true - "create_time": - "description": |- - The creation time of the app. Formatted timestamp in ISO 6801. - "x-databricks-field-behaviors_output_only": |- - true - "creator": - "description": |- - The email of the user that created the app. - "x-databricks-field-behaviors_output_only": |- - true - "default_source_code_path": - "description": |- - The default workspace file system path of the source code from which app deployment are - created. This field tracks the workspace source code path of the last active deployment. - "x-databricks-field-behaviors_output_only": |- - true - "description": - "description": |- - The description of the app. - "effective_budget_policy_id": - "x-databricks-field-behaviors_output_only": |- - true - "effective_usage_policy_id": - "x-databricks-field-behaviors_output_only": |- - true - "effective_user_api_scopes": - "description": |- - The effective api scopes granted to the user access token. - "x-databricks-field-behaviors_output_only": |- - true - "git_repository": - "description": |- - Git repository configuration for app deployments. When specified, deployments can - reference code from this repository by providing only the git reference (branch, tag, or commit). - "x-databricks-preview": |- - PRIVATE - "id": - "description": |- - The unique identifier of the app. - "x-databricks-field-behaviors_output_only": |- - true - "name": - "description": |- - The name of the app. The name must contain only lowercase alphanumeric characters and hyphens. - It must be unique within the workspace. - "oauth2_app_client_id": - "x-databricks-field-behaviors_output_only": |- - true - "oauth2_app_integration_id": - "x-databricks-field-behaviors_output_only": |- - true - "pending_deployment": - "description": |- - The pending deployment of the app. A deployment is considered pending when it is being prepared - for deployment to the app compute. - "x-databricks-field-behaviors_output_only": |- - true - "resources": - "description": |- - Resources for the app. - "service_principal_client_id": - "x-databricks-field-behaviors_output_only": |- - true - "service_principal_id": - "x-databricks-field-behaviors_output_only": |- - true - "service_principal_name": - "x-databricks-field-behaviors_output_only": |- - true - "space": - "description": |- - Name of the space this app belongs to. - "x-databricks-preview": |- - PRIVATE - "update_time": - "description": |- - The update time of the app. Formatted timestamp in ISO 6801. - "x-databricks-field-behaviors_output_only": |- - true - "updater": - "description": |- - The email of the user that last updated the app. - "x-databricks-field-behaviors_output_only": |- - true - "url": - "description": |- - The URL of the app once it is deployed. - "x-databricks-field-behaviors_output_only": |- - true - "usage_policy_id": {} - "user_api_scopes": {} -github.com/databricks/cli/bundle/config/resources.Catalog: - "comment": - "description": |- - User-provided free-form text description. - "connection_name": - "description": |- - The name of the connection to an external data source. - "name": - "description": |- - Name of catalog. - "options": - "description": |- - A map of key-value properties attached to the securable. - "properties": - "description": |- - A map of key-value properties attached to the securable. - "provider_name": - "description": |- - The name of delta sharing provider. - - A Delta Sharing catalog is a catalog that is based on a Delta share on a remote sharing server. - "share_name": - "description": |- - The name of the share under the share provider. - "storage_root": - "description": |- - Storage root URL for managed tables within catalog. -github.com/databricks/cli/bundle/config/resources.Cluster: - "_": - "description": |- - Contains a snapshot of the latest user specified settings that were used to create/edit the cluster. - "apply_policy_default_values": - "description": |- - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. - "autoscale": - "description": |- - Parameters needed in order to automatically scale clusters up and down based on load. - Note: autoscaling works best with DB runtime versions 3.0 or later. - "autotermination_minutes": - "description": |- - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, - this cluster will not be automatically terminated. If specified, the threshold must be between - 10 and 10000 minutes. - Users can also set this value to 0 to explicitly disable automatic termination. - "aws_attributes": - "description": |- - Attributes related to clusters running on Amazon Web Services. - If not specified at cluster creation, a set of default values will be used. - "azure_attributes": - "description": |- - Attributes related to clusters running on Microsoft Azure. - If not specified at cluster creation, a set of default values will be used. - "cluster_log_conf": - "description": |- - The configuration for delivering spark logs to a long-term storage destination. - Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified - for one cluster. If the conf is given, the logs will be delivered to the destination every - `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while - the destination of executor logs is `$destination/$clusterId/executor`. - "cluster_name": - "description": |- - Cluster name requested by the user. This doesn't have to be unique. - If not specified at creation, the cluster name will be an empty string. - For job clusters, the cluster name is automatically set based on the job and job run IDs. - "custom_tags": - "description": |- - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS - instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - - - Currently, Databricks allows at most 45 custom tags - - - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - "data_security_mode": - "description": |- - Data security mode decides what data governance model to use when accessing data - from a cluster. - - The following modes can only be used when `kind = CLASSIC_PREVIEW`. - * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. - * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. - * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. - - The following modes can be used regardless of `kind`. - * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. - * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. - * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. - - The following modes are deprecated starting with Databricks Runtime 15.0 and - will be removed for future Databricks Runtime versions: - - * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. - * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. - * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. - * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. - "docker_image": - "description": |- - Custom docker image BYOC - "driver_instance_pool_id": - "description": |- - The optional ID of the instance pool for the driver of the cluster belongs. - The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not - assigned. - "driver_node_type_flexibility": - "description": |- - Flexible node type configuration for the driver node. - "driver_node_type_id": - "description": |- - The node type of the Spark driver. - Note that this field is optional; if unset, the driver node type will be set as the same value - as `node_type_id` defined above. - - This field, along with node_type_id, should not be set if virtual_cluster_size is set. - If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. - "enable_elastic_disk": - "description": |- - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk - space when its Spark workers are running low on disk space. - "enable_local_disk_encryption": - "description": |- - Whether to enable LUKS on cluster VMs' local disks - "gcp_attributes": - "description": |- - Attributes related to clusters running on Google Cloud Platform. - If not specified at cluster creation, a set of default values will be used. - "init_scripts": - "description": |- - The configuration for storing init scripts. Any number of destinations can be specified. - The scripts are executed sequentially in the order provided. - If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - "instance_pool_id": - "description": |- - The optional ID of the instance pool to which the cluster belongs. - "is_single_node": - "description": |- - This field can only be used when `kind = CLASSIC_PREVIEW`. - - When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - "kind": - "description": |- - The kind of compute described by this compute specification. - - Depending on `kind`, different validations and default values will be applied. - - Clusters with `kind = CLASSIC_PREVIEW` support the following fields, whereas clusters with no specified `kind` do not. - * [is_single_node](/api/workspace/clusters/create#is_single_node) - * [use_ml_runtime](/api/workspace/clusters/create#use_ml_runtime) - * [data_security_mode](/api/workspace/clusters/create#data_security_mode) set to `DATA_SECURITY_MODE_AUTO`, `DATA_SECURITY_MODE_DEDICATED`, or `DATA_SECURITY_MODE_STANDARD` - - By using the [simple form](https://docs.databricks.com/compute/simple-form.html), your clusters are automatically using `kind = CLASSIC_PREVIEW`. - "node_type_id": - "description": |- - This field encodes, through a single value, the resources available to each of - the Spark nodes in this cluster. For example, the Spark nodes can be provisioned - and optimized for memory or compute intensive workloads. A list of available node - types can be retrieved by using the :method:clusters/listNodeTypes API call. - "num_workers": - "description": |- - Number of worker nodes that this cluster should have. A cluster has one Spark Driver - and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. - - Note: When reading the properties of a cluster, this field reflects the desired number - of workers rather than the actual current number of workers. For instance, if a cluster - is resized from 5 to 10 workers, this field will immediately be updated to reflect - the target size of 10 workers, whereas the workers listed in `spark_info` will gradually - increase from 5 to 10 as the new nodes are provisioned. - "policy_id": - "description": |- - The ID of the cluster policy used to create the cluster if applicable. - "remote_disk_throughput": - "description": |- - If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks. - "runtime_engine": - "description": |- - Determines the cluster's runtime engine, either standard or Photon. - - This field is not compatible with legacy `spark_version` values that contain `-photon-`. - Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. - - If left unspecified, the runtime engine defaults to standard unless the spark_version - contains -photon-, in which case Photon will be used. - "single_user_name": - "description": |- - Single user name if data_security_mode is `SINGLE_USER` - "spark_conf": - "description": |- - An object containing a set of optional, user-specified Spark configuration key-value pairs. - Users can also pass in a string of extra JVM options to the driver and the executors via - `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - "spark_env_vars": - "description": |- - An object containing a set of optional, user-specified environment variable key-value pairs. - Please note that key-value pair of the form (X,Y) will be exported as is (i.e., - `export X='Y'`) while launching the driver and workers. - - In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending - them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all - default databricks managed environmental variables are included as well. - - Example Spark environment variables: - `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or - `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - "spark_version": - "description": |- - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. - A list of available Spark versions can be retrieved by using - the :method:clusters/sparkVersions API call. - "ssh_public_keys": - "description": |- - SSH public key contents that will be added to each Spark node in this cluster. The - corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. - Up to 10 keys can be specified. - "total_initial_remote_disk_size": - "description": |- - If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks. - "use_ml_runtime": - "description": |- - This field can only be used when `kind = CLASSIC_PREVIEW`. - - `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. - "worker_node_type_flexibility": - "description": |- - Flexible node type configuration for worker nodes. - "workload_type": - "description": |- - Cluster Attributes showing for clusters workload types. -github.com/databricks/cli/bundle/config/resources.DatabaseCatalog: - "create_database_if_not_exists": {} - "database_instance_name": - "description": |- - The name of the DatabaseInstance housing the database. - "database_name": - "description": |- - The name of the database (in a instance) associated with the catalog. - "name": - "description": |- - The name of the catalog in UC. - "uid": - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/cli/bundle/config/resources.DatabaseInstance: - "_": - "description": |- - A DatabaseInstance represents a logical Postgres instance, comprised of both compute and storage. - "capacity": - "description": |- - The sku of the instance. Valid values are "CU_1", "CU_2", "CU_4", "CU_8". - "child_instance_refs": - "description": |- - The refs of the child instances. This is only available if the instance is - parent instance. - "x-databricks-field-behaviors_output_only": |- - true - "creation_time": - "description": |- - The timestamp when the instance was created. - "x-databricks-field-behaviors_output_only": |- - true - "creator": - "description": |- - The email of the creator of the instance. - "x-databricks-field-behaviors_output_only": |- - true - "custom_tags": - "description": |- - Custom tags associated with the instance. This field is only included on create and update responses. - "effective_capacity": - "description": |- - Deprecated. The sku of the instance; this field will always match the value of capacity. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "deprecation_message": |- - This field is deprecated - "x-databricks-field-behaviors_output_only": |- - true - "effective_custom_tags": - "description": |- - The recorded custom tags associated with the instance. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "effective_enable_pg_native_login": - "description": |- - Whether the instance has PG native password login enabled. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "effective_enable_readable_secondaries": - "description": |- - Whether secondaries serving read-only traffic are enabled. Defaults to false. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "effective_node_count": - "description": |- - The number of nodes in the instance, composed of 1 primary and 0 or more secondaries. Defaults to - 1 primary and 0 secondaries. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "effective_retention_window_in_days": - "description": |- - The retention window for the instance. This is the time window in days - for which the historical data is retained. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "effective_stopped": - "description": |- - Whether the instance is stopped. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "effective_usage_policy_id": - "description": |- - The policy that is applied to the instance. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "enable_pg_native_login": - "description": |- - Whether to enable PG native password login on the instance. Defaults to false. - "enable_readable_secondaries": - "description": |- - Whether to enable secondaries to serve read-only traffic. Defaults to false. - "name": - "description": |- - The name of the instance. This is the unique identifier for the instance. - "node_count": - "description": |- - The number of nodes in the instance, composed of 1 primary and 0 or more secondaries. Defaults to - 1 primary and 0 secondaries. This field is input only, see effective_node_count for the output. - "parent_instance_ref": - "description": |- - The ref of the parent instance. This is only available if the instance is - child instance. - Input: For specifying the parent instance to create a child instance. Optional. - Output: Only populated if provided as input to create a child instance. - "pg_version": - "description": |- - The version of Postgres running on the instance. - "x-databricks-field-behaviors_output_only": |- - true - "read_only_dns": - "description": |- - The DNS endpoint to connect to the instance for read only access. This is only available if - enable_readable_secondaries is true. - "x-databricks-field-behaviors_output_only": |- - true - "read_write_dns": - "description": |- - The DNS endpoint to connect to the instance for read+write access. - "x-databricks-field-behaviors_output_only": |- - true - "retention_window_in_days": - "description": |- - The retention window for the instance. This is the time window in days - for which the historical data is retained. The default value is 7 days. - Valid values are 2 to 35 days. - "state": - "description": |- - The current state of the instance. - "x-databricks-field-behaviors_output_only": |- - true - "stopped": - "description": |- - Whether to stop the instance. An input only param, see effective_stopped for the output. - "uid": - "description": |- - An immutable UUID identifier for the instance. - "x-databricks-field-behaviors_output_only": |- - true - "usage_policy_id": - "description": |- - The desired usage policy to associate with the instance. -github.com/databricks/cli/bundle/config/resources.ExternalLocation: - "comment": - "description": |- - User-provided free-form text description. - "credential_name": - "description": |- - Name of the storage credential used with this location. - "effective_enable_file_events": - "description": |- - The effective value of `enable_file_events` after applying server-side defaults. - "x-databricks-field-behaviors_output_only": |- - true - "enable_file_events": - "description": |- - Whether to enable file events on this external location. Default to `true`. Set to `false` to disable file events. - The actual applied value may differ due to server-side defaults; check `effective_enable_file_events` for the effective state. - "encryption_details": - "description": |- - Encryption options that apply to clients connecting to cloud storage. - "fallback": - "description": |- - Indicates whether fallback mode is enabled for this external location. When fallback mode is enabled, the access to the location falls back to cluster credentials if UC credentials are not sufficient. - "file_event_queue": - "description": |- - File event queue settings. If `enable_file_events` is not `false`, must be defined and have exactly one of the documented properties. - "name": - "description": |- - Name of the external location. - "read_only": - "description": |- - Indicates whether the external location is read-only. - "skip_validation": - "description": |- - Skips validation of the storage credential associated with the external location. - "url": - "description": |- - Path URL of the external location. -github.com/databricks/cli/bundle/config/resources.Job: - "budget_policy_id": - "description": |- - The id of the user specified budget policy to use for this job. - If not specified, a default budget policy may be applied when creating or modifying the job. - See `effective_budget_policy_id` for the budget policy used by this workload. - "continuous": - "description": |- - An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. - "deployment": - "description": |- - Deployment information for jobs managed by external sources. - "description": - "description": |- - An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. - "edit_mode": - "description": |- - Edit mode of the job. - - * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. - * `EDITABLE`: The job is in an editable state and can be modified. - "email_notifications": - "description": |- - An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. - "environments": - "description": |- - A list of task execution environment specifications that can be referenced by serverless tasks of this job. - For serverless notebook tasks, if the environment_key is not specified, the notebook environment will be used if present. If a jobs environment is specified, it will override the notebook environment. - For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. - "format": - "description": |- - Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `"MULTI_TASK"`. - "deprecation_message": |- - This field is deprecated - "git_source": - "description": |- - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. - - If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. - - Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. - "health": - "description": |- - An optional set of health rules that can be defined for this job. - "job_clusters": - "description": |- - A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. - "max_concurrent_runs": - "description": |- - An optional maximum allowed number of concurrent runs of the job. - Set this value if you want to be able to execute multiple runs of the same job concurrently. - This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. - This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. - However, from then on, new runs are skipped unless there are fewer than 3 active runs. - This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. - "name": - "description": |- - An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. - "notification_settings": - "description": |- - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. - "parameters": - "description": |- - Job-level parameter definitions - "performance_target": - "description": |- - The performance mode on a serverless job. This field determines the level of compute performance or cost-efficiency for the run. - The performance target does not apply to tasks that run on Serverless GPU compute. - - * `STANDARD`: Enables cost-efficient execution of serverless workloads. - * `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance. - "queue": - "description": |- - The queue settings of the job. - "run_as": - "description": |- - The user or service principal that the job runs as, if specified in the request. - This field indicates the explicit configuration of `run_as` for the job. - To find the value in all cases, explicit or implicit, use `run_as_user_name`. - "schedule": - "description": |- - An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - "tags": - "description": |- - A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. - "tasks": - "description": |- - A list of task specifications to be executed by this job. - It supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit). - Read endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. - "timeout_seconds": - "description": |- - An optional timeout applied to each run of this job. A value of `0` means no timeout. - "trigger": - "description": |- - A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - "usage_policy_id": - "description": |- - The id of the user specified usage policy to use for this job. - If not specified, a default usage policy may be applied when creating or modifying the job. - See `effective_usage_policy_id` for the usage policy used by this workload. - "x-databricks-preview": |- - PRIVATE - "webhook_notifications": - "description": |- - A collection of system notification IDs to notify when runs of this job begin or complete. -github.com/databricks/cli/bundle/config/resources.MlflowExperiment: - "artifact_location": - "description": |- - Location where all artifacts for the experiment are stored. - If not provided, the remote server will select an appropriate default. - "name": - "description": |- - Experiment name. - "tags": - "description": |- - A collection of tags to set on the experiment. Maximum tag size and number of tags per request - depends on the storage backend. All storage backends are guaranteed to support tag keys up - to 250 bytes in size and tag values up to 5000 bytes in size. All storage backends are also - guaranteed to support up to 20 tags per request. -github.com/databricks/cli/bundle/config/resources.MlflowModel: - "description": - "description": |- - Optional description for registered model. - "name": - "description": |- - Register models under this name - "tags": - "description": |- - Additional metadata for registered model. -github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: - "ai_gateway": - "description": |- - The AI Gateway configuration for the serving endpoint. NOTE: External model, provisioned throughput, and pay-per-token endpoints are fully supported; agent endpoints currently only support inference tables. - "budget_policy_id": - "description": |- - The budget policy to be applied to the serving endpoint. - "config": - "description": |- - The core config of the serving endpoint. - "description": {} - "email_notifications": - "description": |- - Email notification settings. - "name": - "description": |- - The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. - An endpoint name can consist of alphanumeric characters, dashes, and underscores. - "rate_limits": - "description": |- - Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. - "deprecation_message": |- - This field is deprecated - "route_optimized": - "description": |- - Enable route optimization for the serving endpoint. - "tags": - "description": |- - Tags to be attached to the serving endpoint and automatically propagated to billing logs. -github.com/databricks/cli/bundle/config/resources.Pipeline: - "allow_duplicate_names": - "description": |- - If false, deployment will fail if name conflicts with that of another pipeline. - "budget_policy_id": - "description": |- - Budget policy of this pipeline. - "catalog": - "description": |- - A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. - "channel": - "description": |- - DLT Release Channel that specifies which version to use. - "clusters": - "description": |- - Cluster settings for this pipeline deployment. - "configuration": - "description": |- - String-String configuration for this pipeline execution. - "continuous": - "description": |- - Whether the pipeline is continuous or triggered. This replaces `trigger`. - "deployment": - "description": |- - Deployment type of this pipeline. - "development": - "description": |- - Whether the pipeline is in Development mode. Defaults to false. - "dry_run": {} - "edition": - "description": |- - Pipeline product edition. - "environment": - "description": |- - Environment specification for this pipeline used to install dependencies. - "event_log": - "description": |- - Event log configuration for this pipeline - "filters": - "description": |- - Filters on which Pipeline packages to include in the deployed graph. - "gateway_definition": - "description": |- - The definition of a gateway pipeline to support change data capture. - "x-databricks-preview": |- - PRIVATE - "id": - "description": |- - Unique identifier for this pipeline. - "ingestion_definition": - "description": |- - The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'schema', 'target', or 'catalog' settings. - "libraries": - "description": |- - Libraries or code needed by this deployment. - "name": - "description": |- - Friendly identifier for this pipeline. - "notifications": - "description": |- - List of notification settings for this pipeline. - "photon": - "description": |- - Whether Photon is enabled for this pipeline. - "restart_window": - "description": |- - Restart window of this pipeline. - "x-databricks-preview": |- - PRIVATE - "root_path": - "description": |- - Root path for this pipeline. - This is used as the root directory when editing the pipeline in the Databricks user interface and it is - added to sys.path when executing Python sources during pipeline execution. - "run_as": - "description": |- - Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline. - - Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. - "schema": - "description": |- - The default schema (database) where tables are read from or published to. - "serverless": - "description": |- - Whether serverless compute is enabled for this pipeline. - "storage": - "description": |- - DBFS root directory for storing checkpoints and tables. - "tags": - "description": |- - A map of tags associated with the pipeline. - These are forwarded to the cluster as cluster tags, and are therefore subject to the same limitations. - A maximum of 25 tags can be added to the pipeline. - "target": - "description": |- - Target schema (database) to add tables in this pipeline to. Exactly one of `schema` or `target` must be specified. To publish to Unity Catalog, also specify `catalog`. This legacy field is deprecated for pipeline creation in favor of the `schema` field. - "deprecation_message": |- - This field is deprecated - "trigger": - "description": |- - Which pipeline trigger to use. Deprecated: Use `continuous` instead. - "deprecation_message": |- - This field is deprecated - "usage_policy_id": - "description": |- - Usage policy of this pipeline. - "x-databricks-preview": |- - PRIVATE -github.com/databricks/cli/bundle/config/resources.QualityMonitor: - "assets_dir": - "description": |- - [Create:REQ Update:IGN] Field for specifying the absolute path to a custom directory to store data-monitoring - assets. Normally prepopulated to a default user location via UI and Python APIs. - "baseline_table_name": - "description": |- - [Create:OPT Update:OPT] Baseline table name. - Baseline data is used to compute drift from the data in the monitored `table_name`. - The baseline table and the monitored table shall have the same schema. - "custom_metrics": - "description": |- - [Create:OPT Update:OPT] Custom metrics. - "data_classification_config": - "description": |- - [Create:OPT Update:OPT] Data classification related config. - "x-databricks-preview": |- - PRIVATE - "inference_log": {} - "latest_monitor_failure_msg": - "description": |- - [Create:ERR Update:IGN] The latest error message for a monitor failure. - "notifications": - "description": |- - [Create:OPT Update:OPT] Field for specifying notification settings. - "output_schema_name": - "description": |- - [Create:REQ Update:REQ] Schema where output tables are created. Needs to be in 2-level format {catalog}.{schema} - "schedule": - "description": |- - [Create:OPT Update:OPT] The monitor schedule. - "skip_builtin_dashboard": - "description": |- - Whether to skip creating a default dashboard summarizing data quality metrics. - "slicing_exprs": - "description": |- - [Create:OPT Update:OPT] List of column expressions to slice data with for targeted analysis. The data is grouped by - each expression independently, resulting in a separate slice for each predicate and its - complements. For example `slicing_exprs=[“col_1”, “col_2 > 10”]` will generate the following - slices: two slices for `col_2 > 10` (True and False), and one slice per unique value in - `col1`. For high-cardinality columns, only the top 100 unique values by frequency will - generate slices. - "snapshot": - "description": |- - Configuration for monitoring snapshot tables. - "time_series": - "description": |- - Configuration for monitoring time series tables. - "warehouse_id": - "description": |- - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running - warehouse will be used. -github.com/databricks/cli/bundle/config/resources.RegisteredModel: - "aliases": - "description": |- - List of aliases associated with the registered model - "browse_only": - "description": |- - Indicates whether the principal is limited to retrieving metadata for the associated object through the BROWSE privilege when include_browse is enabled in the request. - "catalog_name": - "description": |- - The name of the catalog where the schema and the registered model reside - "comment": - "description": |- - The comment attached to the registered model - "created_at": - "description": |- - Creation timestamp of the registered model in milliseconds since the Unix epoch - "created_by": - "description": |- - The identifier of the user who created the registered model - "full_name": - "description": |- - The three-level (fully qualified) name of the registered model - "metastore_id": - "description": |- - The unique identifier of the metastore - "name": - "description": |- - The name of the registered model - "owner": - "description": |- - The identifier of the user who owns the registered model - "schema_name": - "description": |- - The name of the schema where the registered model resides - "storage_location": - "description": |- - The storage location on the cloud under which model version data files are stored - "updated_at": - "description": |- - Last-update timestamp of the registered model in milliseconds since the Unix epoch - "updated_by": - "description": |- - The identifier of the user who updated the registered model last time -github.com/databricks/cli/bundle/config/resources.Schema: - "catalog_name": - "description": |- - Name of parent catalog. - "comment": - "description": |- - User-provided free-form text description. - "name": - "description": |- - Name of schema, relative to parent catalog. - "properties": - "description": |- - A map of key-value properties attached to the securable. - "storage_root": - "description": |- - Storage root URL for managed tables within schema. -github.com/databricks/cli/bundle/config/resources.SqlWarehouse: - "_": - "description": |- - Creates a new SQL warehouse. - "auto_stop_mins": - "description": |- - The amount of time in minutes that a SQL warehouse must be idle (i.e., no - RUNNING queries) before it is automatically stopped. - - Supported values: - - Must be == 0 or >= 10 mins - - 0 indicates no autostop. - - Defaults to 120 mins - "channel": - "description": |- - Channel Details - "cluster_size": - "description": |- - Size of the clusters allocated for this warehouse. - Increasing the size of a spark cluster allows you to run larger queries on - it. If you want to increase the number of concurrent queries, please tune - max_num_clusters. - - Supported values: - - 2X-Small - - X-Small - - Small - - Medium - - Large - - X-Large - - 2X-Large - - 3X-Large - - 4X-Large - - 5X-Large - "creator_name": - "description": |- - warehouse creator name - "enable_photon": - "description": |- - Configures whether the warehouse should use Photon optimized clusters. - - Defaults to true. - "enable_serverless_compute": - "description": |- - Configures whether the warehouse should use serverless compute - "instance_profile_arn": - "description": |- - Deprecated. Instance profile used to pass IAM role to the cluster - "deprecation_message": |- - This field is deprecated - "max_num_clusters": - "description": |- - Maximum number of clusters that the autoscaler will create to handle - concurrent queries. - - Supported values: - - Must be >= min_num_clusters - - Must be <= 40. - - Defaults to min_clusters if unset. - "min_num_clusters": - "description": |- - Minimum number of available clusters that will be maintained for this SQL - warehouse. Increasing this will ensure that a larger number of clusters are - always running and therefore may reduce the cold start time for new - queries. This is similar to reserved vs. revocable cores in a resource - manager. - - Supported values: - - Must be > 0 - - Must be <= min(max_num_clusters, 30) - - Defaults to 1 - "name": - "description": |- - Logical name for the cluster. - - Supported values: - - Must be unique within an org. - - Must be less than 100 characters. - "spot_instance_policy": - "description": |- - Configurations whether the endpoint should use spot instances. - "tags": - "description": |- - A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) associated - with this SQL warehouse. - - Supported values: - - Number of tags < 45. - "warehouse_type": - "description": |- - Warehouse type: `PRO` or `CLASSIC`. If you want to use serverless compute, - you must set to `PRO` and also set the field `enable_serverless_compute` to `true`. -github.com/databricks/cli/bundle/config/resources.SyncedDatabaseTable: - "_": - "description": |- - Next field marker: 18 - "data_synchronization_status": - "description": |- - Synced Table data synchronization status - "x-databricks-field-behaviors_output_only": |- - true - "database_instance_name": - "description": |- - Name of the target database instance. This is required when creating synced database tables in standard catalogs. - This is optional when creating synced database tables in registered catalogs. If this field is specified - when creating synced database tables in registered catalogs, the database instance name MUST - match that of the registered catalog (or the request will be rejected). - "effective_database_instance_name": - "description": |- - The name of the database instance that this table is registered to. This field is always returned, and for - tables inside database catalogs is inferred database instance associated with the catalog. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "effective_logical_database_name": - "description": |- - The name of the logical database that this table is registered to. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "logical_database_name": - "description": |- - Target Postgres database object (logical database) name for this table. - - When creating a synced table in a registered Postgres catalog, the - target Postgres database name is inferred to be that of the registered catalog. - If this field is specified in this scenario, the Postgres database name MUST - match that of the registered catalog (or the request will be rejected). - - When creating a synced table in a standard catalog, this field is required. - In this scenario, specifying this field will allow targeting an arbitrary postgres database. - Note that this has implications for the `create_database_objects_is_missing` field in `spec`. - "name": - "description": |- - Full three-part (catalog, schema, table) name of the table. - "spec": - "description": |- - Specification of a synced database table. - "unity_catalog_provisioning_state": - "description": |- - The provisioning state of the synced table entity in Unity Catalog. This is distinct from the - state of the data synchronization pipeline (i.e. the table may be in "ACTIVE" but the pipeline - may be in "PROVISIONING" as it runs asynchronously). - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/cli/bundle/config/resources.Volume: - "catalog_name": - "description": |- - The name of the catalog where the schema and the volume are - "comment": - "description": |- - The comment attached to the volume - "name": - "description": |- - The name of the volume - "schema_name": - "description": |- - The name of the schema where the volume is - "storage_location": - "description": |- - The storage location on the cloud - "volume_type": - "description": |- - The type of the volume. An external volume is located in the specified external location. - A managed volume is located in the default location which is specified by the parent schema, or the parent catalog, or the Metastore. - [Learn more](https://docs.databricks.com/aws/en/volumes/managed-vs-external) -github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: - "command": - "description": |- - The command with which to run the app. This will override the command specified in the app.yaml file. - "create_time": - "description": |- - The creation time of the deployment. Formatted timestamp in ISO 6801. - "x-databricks-field-behaviors_output_only": |- - true - "creator": - "description": |- - The email of the user creates the deployment. - "x-databricks-field-behaviors_output_only": |- - true - "deployment_artifacts": - "description": |- - The deployment artifacts for an app. - "x-databricks-field-behaviors_output_only": |- - true - "deployment_id": - "description": |- - The unique id of the deployment. - "env_vars": - "description": |- - The environment variables to set in the app runtime environment. This will override the environment variables specified in the app.yaml file. - "git_source": - "description": |- - Git repository to use as the source for the app deployment. - "x-databricks-preview": |- - PRIVATE - "mode": - "description": |- - The mode of which the deployment will manage the source code. - "source_code_path": - "description": |- - The workspace file system path of the source code used to create the app deployment. This is different from - `deployment_artifacts.source_code_path`, which is the path used by the deployed app. The former refers - to the original source code location of the app in the workspace during deployment creation, whereas - the latter provides a system generated stable snapshotted source code path used by the deployment. - "status": - "description": |- - Status and status message of the deployment - "x-databricks-field-behaviors_output_only": |- - true - "update_time": - "description": |- - The update time of the deployment. Formatted timestamp in ISO 6801. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: - "source_code_path": - "description": |- - The snapshotted workspace file system path of the source code loaded by the deployed app. -github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentMode: - "_": - "enum": - - |- - SNAPSHOT - - |- - AUTO_SYNC -github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentState: - "_": - "enum": - - |- - SUCCEEDED - - |- - FAILED - - |- - IN_PROGRESS - - |- - CANCELLED -github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: - "message": - "description": |- - Message corresponding with the deployment state. - "x-databricks-field-behaviors_output_only": |- - true - "state": - "description": |- - State of the deployment. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/apps.AppResource: - "app": - "x-databricks-preview": |- - PRIVATE - "database": {} - "description": - "description": |- - Description of the App Resource. - "experiment": {} - "genie_space": {} - "job": {} - "name": - "description": |- - Name of the App Resource. - "secret": {} - "serving_endpoint": {} - "sql_warehouse": {} - "uc_securable": {} -github.com/databricks/databricks-sdk-go/service/apps.AppResourceApp: {} -github.com/databricks/databricks-sdk-go/service/apps.AppResourceDatabase: - "database_name": {} - "instance_name": {} - "permission": {} -github.com/databricks/databricks-sdk-go/service/apps.AppResourceDatabaseDatabasePermission: - "_": - "enum": - - |- - CAN_CONNECT_AND_CREATE -github.com/databricks/databricks-sdk-go/service/apps.AppResourceExperiment: - "experiment_id": {} - "permission": {} -github.com/databricks/databricks-sdk-go/service/apps.AppResourceExperimentExperimentPermission: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_EDIT - - |- - CAN_READ -github.com/databricks/databricks-sdk-go/service/apps.AppResourceGenieSpace: - "name": {} - "permission": {} - "space_id": {} -github.com/databricks/databricks-sdk-go/service/apps.AppResourceGenieSpaceGenieSpacePermission: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_EDIT - - |- - CAN_RUN - - |- - CAN_VIEW -github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: - "id": - "description": |- - Id of the job to grant permission on. - "permission": - "description": |- - Permissions to grant on the Job. Supported permissions are: "CAN_MANAGE", "IS_OWNER", "CAN_MANAGE_RUN", "CAN_VIEW". -github.com/databricks/databricks-sdk-go/service/apps.AppResourceJobJobPermission: - "_": - "enum": - - |- - CAN_MANAGE - - |- - IS_OWNER - - |- - CAN_MANAGE_RUN - - |- - CAN_VIEW -github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: - "key": - "description": |- - Key of the secret to grant permission on. - "permission": - "description": |- - Permission to grant on the secret scope. For secrets, only one permission is allowed. Permission must be one of: "READ", "WRITE", "MANAGE". - "scope": - "description": |- - Scope of the secret to grant permission on. -github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecretSecretPermission: - "_": - "description": |- - Permission to grant on the secret scope. Supported permissions are: "READ", "WRITE", "MANAGE". - "enum": - - |- - READ - - |- - WRITE - - |- - MANAGE -github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: - "name": - "description": |- - Name of the serving endpoint to grant permission on. - "permission": - "description": |- - Permission to grant on the serving endpoint. Supported permissions are: "CAN_MANAGE", "CAN_QUERY", "CAN_VIEW". -github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpointServingEndpointPermission: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_QUERY - - |- - CAN_VIEW -github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: - "id": - "description": |- - Id of the SQL warehouse to grant permission on. - "permission": - "description": |- - Permission to grant on the SQL warehouse. Supported permissions are: "CAN_MANAGE", "CAN_USE", "IS_OWNER". -github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouseSqlWarehousePermission: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_USE - - |- - IS_OWNER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceUcSecurable: - "permission": {} - "securable_full_name": {} - "securable_kind": - "description": |- - The securable kind from Unity Catalog. - See https://docs.databricks.com/api/workspace/tables/get#securable_kind_manifest-securable_kind. - "x-databricks-field-behaviors_output_only": |- - true - "x-databricks-preview": |- - PRIVATE - "securable_type": {} -github.com/databricks/databricks-sdk-go/service/apps.AppResourceUcSecurableUcSecurablePermission: - "_": - "enum": - - |- - READ_VOLUME - - |- - WRITE_VOLUME - - |- - SELECT - - |- - EXECUTE - - |- - USE_CONNECTION - - |- - MODIFY -github.com/databricks/databricks-sdk-go/service/apps.AppResourceUcSecurableUcSecurableType: - "_": - "enum": - - |- - VOLUME - - |- - TABLE - - |- - FUNCTION - - |- - CONNECTION -github.com/databricks/databricks-sdk-go/service/apps.ApplicationState: - "_": - "enum": - - |- - DEPLOYING - - |- - RUNNING - - |- - CRASHED - - |- - UNAVAILABLE -github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: - "message": - "description": |- - Application status message - "x-databricks-field-behaviors_output_only": |- - true - "state": - "description": |- - State of the application. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/apps.ComputeSize: - "_": - "enum": - - |- - MEDIUM - - |- - LARGE -github.com/databricks/databricks-sdk-go/service/apps.ComputeState: - "_": - "enum": - - |- - ERROR - - |- - DELETING - - |- - STARTING - - |- - STOPPING - - |- - UPDATING - - |- - STOPPED - - |- - ACTIVE -github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: - "active_instances": - "description": |- - The number of compute instances currently serving requests for this - application. An instance is considered active if it is reachable and ready - to handle requests. - "x-databricks-field-behaviors_output_only": |- - true - "x-databricks-preview": |- - PRIVATE - "message": - "description": |- - Compute status message - "x-databricks-field-behaviors_output_only": |- - true - "state": - "description": |- - State of the app compute. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/apps.EnvVar: - "name": - "description": |- - The name of the environment variable. - "value": - "description": |- - The value for the environment variable. - "value_from": - "description": |- - The name of an external Databricks resource that contains the value, such as a secret or a database table. -github.com/databricks/databricks-sdk-go/service/apps.GitRepository: - "_": - "description": |- - Git repository configuration specifying the location of the repository. - "provider": - "description": |- - Git provider. Case insensitive. Supported values: gitHub, gitHubEnterprise, bitbucketCloud, - bitbucketServer, azureDevOpsServices, gitLab, gitLabEnterpriseEdition, awsCodeCommit. - "url": - "description": |- - URL of the Git repository. -github.com/databricks/databricks-sdk-go/service/apps.GitSource: - "_": - "description": |- - Complete git source specification including repository location and reference. - "branch": - "description": |- - Git branch to checkout. - "commit": - "description": |- - Git commit SHA to checkout. - "git_repository": - "description": |- - Git repository configuration. Populated from the app's git_repository configuration. - "x-databricks-field-behaviors_output_only": |- - true - "resolved_commit": - "description": |- - The resolved commit SHA that was actually used for the deployment. This is populated by the - system after resolving the reference (branch, tag, or commit). If commit is specified - directly, this will match commit. If a branch or tag is specified, this contains the - commit SHA that the branch or tag pointed to at deployment time. - "x-databricks-field-behaviors_output_only": |- - true - "x-databricks-preview": |- - PRIVATE - "source_code_path": - "description": |- - Relative path to the app source code within the Git repository. If not specified, the root - of the repository is used. - "tag": - "description": |- - Git tag to checkout. -github.com/databricks/databricks-sdk-go/service/catalog.AwsSqsQueue: - "managed_resource_id": - "description": |- - Unique identifier included in the name of file events managed cloud resources. - "x-databricks-field-behaviors_output_only": |- - true - "queue_url": - "description": |- - The AQS queue url in the format https://sqs.{region}.amazonaws.com/{account id}/{queue name}. - Only required for provided_sqs. -github.com/databricks/databricks-sdk-go/service/catalog.AzureQueueStorage: - "managed_resource_id": - "description": |- - Unique identifier included in the name of file events managed cloud resources. - "x-databricks-field-behaviors_output_only": |- - true - "queue_url": - "description": |- - The AQS queue url in the format https://{storage account}.queue.core.windows.net/{queue name} - Only required for provided_aqs. - "resource_group": - "description": |- - Optional resource group for the queue, event grid subscription, and external location storage - account. - Only required for locations with a service principal storage credential - "subscription_id": - "description": |- - Optional subscription id for the queue, event grid subscription, and external location storage - account. - Required for locations with a service principal storage credential -github.com/databricks/databricks-sdk-go/service/catalog.EncryptionDetails: - "_": - "description": |- - Encryption options that apply to clients connecting to cloud storage. - "sse_encryption_details": - "description": |- - Server-Side Encryption properties for clients communicating with AWS s3. -github.com/databricks/databricks-sdk-go/service/catalog.FileEventQueue: - "managed_aqs": {} - "managed_pubsub": {} - "managed_sqs": {} - "provided_aqs": {} - "provided_pubsub": {} - "provided_sqs": {} -github.com/databricks/databricks-sdk-go/service/catalog.GcpPubsub: - "managed_resource_id": - "description": |- - Unique identifier included in the name of file events managed cloud resources. - "x-databricks-field-behaviors_output_only": |- - true - "subscription_name": - "description": |- - The Pub/Sub subscription name in the format projects/{project}/subscriptions/{subscription name}. - Only required for provided_pubsub. -github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedule: - "pause_status": - "description": |- - Read only field that indicates whether a schedule is paused or not. - "quartz_cron_expression": - "description": |- - The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). - "timezone_id": - "description": |- - The timezone id (e.g., ``PST``) in which to evaluate the quartz expression. -github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedulePauseStatus: - "_": - "description": |- - Source link: https://src.dev.databricks.com/databricks/universe/-/blob/elastic-spark-common/api/messages/schedule.proto - Monitoring workflow schedule pause status. - "enum": - - |- - UNSPECIFIED - - |- - UNPAUSED - - |- - PAUSED -github.com/databricks/databricks-sdk-go/service/catalog.MonitorDataClassificationConfig: - "_": - "description": |- - Data classification related configuration. - "enabled": - "description": |- - Whether to enable data classification. -github.com/databricks/databricks-sdk-go/service/catalog.MonitorDestination: - "email_addresses": - "description": |- - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. -github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLog: - "granularities": - "description": |- - List of granularities to use when aggregating data into time windows based on their timestamp. - "label_col": - "description": |- - Column for the label. - "model_id_col": - "description": |- - Column for the model identifier. - "prediction_col": - "description": |- - Column for the prediction. - "prediction_proba_col": - "description": |- - Column for prediction probabilities - "problem_type": - "description": |- - Problem type the model aims to solve. - "timestamp_col": - "description": |- - Column for the timestamp. -github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLogProblemType: - "_": - "enum": - - |- - PROBLEM_TYPE_CLASSIFICATION - - |- - PROBLEM_TYPE_REGRESSION -github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetric: - "_": - "description": |- - Custom metric definition. - "definition": - "description": |- - Jinja template for a SQL expression that specifies how to compute the metric. See [create metric definition](https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition). - "input_columns": - "description": |- - A list of column names in the input table the metric should be computed for. - Can use ``":table"`` to indicate that the metric needs information from multiple columns. - "name": - "description": |- - Name of the metric in the output tables. - "output_data_type": - "description": |- - The output type of the custom metric. - "type": - "description": |- - Can only be one of ``"CUSTOM_METRIC_TYPE_AGGREGATE"``, ``"CUSTOM_METRIC_TYPE_DERIVED"``, or ``"CUSTOM_METRIC_TYPE_DRIFT"``. - The ``"CUSTOM_METRIC_TYPE_AGGREGATE"`` and ``"CUSTOM_METRIC_TYPE_DERIVED"`` metrics - are computed on a single table, whereas the ``"CUSTOM_METRIC_TYPE_DRIFT"`` compare metrics across - baseline and input table, or across the two consecutive time windows. - - CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table - - CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics - - CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics -github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetricType: - "_": - "description": |- - Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``. - The ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics - are computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across - baseline and input table, or across the two consecutive time windows. - - CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table - - CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics - - CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics - "enum": - - |- - CUSTOM_METRIC_TYPE_AGGREGATE - - |- - CUSTOM_METRIC_TYPE_DERIVED - - |- - CUSTOM_METRIC_TYPE_DRIFT -github.com/databricks/databricks-sdk-go/service/catalog.MonitorNotifications: - "on_failure": - "description": |- - Destinations to send notifications on failure/timeout. - "on_new_classification_tag_detected": - "description": |- - Destinations to send notifications on new classification tag detected. - "x-databricks-preview": |- - PRIVATE -github.com/databricks/databricks-sdk-go/service/catalog.MonitorSnapshot: - "_": - "description": |- - Snapshot analysis configuration -github.com/databricks/databricks-sdk-go/service/catalog.MonitorTimeSeries: - "_": - "description": |- - Time series analysis configuration. - "granularities": - "description": |- - Granularities for aggregating data into time windows based on their timestamp. Currently the following static - granularities are supported: - {``\"5 minutes\"``, ``\"30 minutes\"``, ``\"1 hour\"``, ``\"1 day\"``, ``\"\u003cn\u003e week(s)\"``, ``\"1 month\"``, ``\"1 year\"``}. - "timestamp_col": - "description": |- - Column for the timestamp. -github.com/databricks/databricks-sdk-go/service/catalog.Privilege: - "_": - "enum": - - |- - SELECT - - |- - READ_PRIVATE_FILES - - |- - WRITE_PRIVATE_FILES - - |- - CREATE - - |- - USAGE - - |- - USE_CATALOG - - |- - USE_SCHEMA - - |- - CREATE_SCHEMA - - |- - CREATE_VIEW - - |- - CREATE_EXTERNAL_TABLE - - |- - CREATE_MATERIALIZED_VIEW - - |- - CREATE_FUNCTION - - |- - CREATE_MODEL - - |- - CREATE_CATALOG - - |- - CREATE_MANAGED_STORAGE - - |- - CREATE_EXTERNAL_LOCATION - - |- - CREATE_STORAGE_CREDENTIAL - - |- - CREATE_SERVICE_CREDENTIAL - - |- - ACCESS - - |- - CREATE_SHARE - - |- - CREATE_RECIPIENT - - |- - CREATE_PROVIDER - - |- - USE_SHARE - - |- - USE_RECIPIENT - - |- - USE_PROVIDER - - |- - USE_MARKETPLACE_ASSETS - - |- - SET_SHARE_PERMISSION - - |- - MODIFY - - |- - REFRESH - - |- - EXECUTE - - |- - READ_FILES - - |- - WRITE_FILES - - |- - CREATE_TABLE - - |- - ALL_PRIVILEGES - - |- - CREATE_CONNECTION - - |- - USE_CONNECTION - - |- - APPLY_TAG - - |- - CREATE_FOREIGN_CATALOG - - |- - CREATE_FOREIGN_SECURABLE - - |- - MANAGE_ALLOWLIST - - |- - CREATE_VOLUME - - |- - CREATE_EXTERNAL_VOLUME - - |- - READ_VOLUME - - |- - WRITE_VOLUME - - |- - MANAGE - - |- - BROWSE - - |- - CREATE_CLEAN_ROOM - - |- - MODIFY_CLEAN_ROOM - - |- - EXECUTE_CLEAN_ROOM_TASK - - |- - EXTERNAL_USE_SCHEMA -github.com/databricks/databricks-sdk-go/service/catalog.PrivilegeAssignment: - "principal": - "description": |- - The principal (user email address or group name). - For deleted principals, `principal` is empty while `principal_id` is populated. - "privileges": - "description": |- - The privileges assigned to the principal. -github.com/databricks/databricks-sdk-go/service/catalog.RegisteredModelAlias: - "alias_name": - "description": |- - Name of the alias, e.g. 'champion' or 'latest_stable' - "catalog_name": - "description": |- - The name of the catalog containing the model version - "id": - "description": |- - The unique identifier of the alias - "model_name": - "description": |- - The name of the parent registered model of the model version, relative to parent schema - "schema_name": - "description": |- - The name of the schema containing the model version, relative to parent catalog - "version_num": - "description": |- - Integer version number of the model version to which this alias points. -github.com/databricks/databricks-sdk-go/service/catalog.SseEncryptionDetails: - "_": - "description": |- - Server-Side Encryption properties for clients communicating with AWS s3. - "algorithm": - "description": |- - Sets the value of the 'x-amz-server-side-encryption' header in S3 request. - "aws_kms_key_arn": - "description": |- - Optional. The ARN of the SSE-KMS key used with the S3 location, when algorithm = "SSE-KMS". - Sets the value of the 'x-amz-server-side-encryption-aws-kms-key-id' header. -github.com/databricks/databricks-sdk-go/service/catalog.SseEncryptionDetailsAlgorithm: - "_": - "enum": - - |- - AWS_SSE_S3 - - |- - AWS_SSE_KMS -github.com/databricks/databricks-sdk-go/service/catalog.VolumeType: - "_": - "enum": - - |- - MANAGED - - |- - EXTERNAL -github.com/databricks/databricks-sdk-go/service/compute.Adlsgen2Info: - "_": - "description": |- - A storage location in Adls Gen2 - "destination": - "description": |- - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. -github.com/databricks/databricks-sdk-go/service/compute.AutoScale: - "max_workers": - "description": |- - The maximum number of workers to which the cluster can scale up when overloaded. - Note that `max_workers` must be strictly greater than `min_workers`. - "min_workers": - "description": |- - The minimum number of workers to which the cluster can scale down when underutilized. - It is also the initial number of workers the cluster will have after creation. -github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: - "_": - "description": |- - Attributes set during cluster creation which are related to Amazon Web Services. - "availability": - "description": |- - Availability type used for all subsequent nodes past the `first_on_demand` ones. - - Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - "ebs_volume_count": - "description": |- - The number of volumes launched for each instance. Users can choose up to 10 volumes. - This feature is only enabled for supported node types. Legacy node types cannot specify - custom EBS volumes. - For node types with no instance store, at least one EBS volume needs to be specified; - otherwise, cluster creation will fail. - - These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. - Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. - - If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for - scratch storage because heterogenously sized scratch devices can lead to inefficient disk - utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance - store volumes. - - Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` - will be overridden. - "ebs_volume_iops": - "description": |- - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - "ebs_volume_size": - "description": |- - The size of each EBS volume (in GiB) launched for each instance. For general purpose - SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, - this value must be within the range 500 - 4096. - "ebs_volume_throughput": - "description": |- - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - "ebs_volume_type": - "description": |- - The type of EBS volumes that will be launched with this cluster. - "first_on_demand": - "description": |- - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. - If this value is greater than 0, the cluster driver node in particular will be placed on an - on-demand instance. If this value is greater than or equal to the current cluster size, all - nodes will be placed on on-demand instances. If this value is less than the current cluster - size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will - be placed on `availability` instances. Note that this value does not affect - cluster size and cannot currently be mutated over the lifetime of a cluster. - "instance_profile_arn": - "description": |- - Nodes for this cluster will only be placed on AWS instances with this instance profile. If - ommitted, nodes will be placed on instances without an IAM instance profile. The instance - profile must have previously been added to the Databricks environment by an account - administrator. - - This feature may only be available to certain customer plans. - "spot_bid_price_percent": - "description": |- - The bid price for AWS spot instances, as a percentage of the corresponding instance type's - on-demand price. - For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot - instance, then the bid price is half of the price of - on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice - the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. - When spot instances are requested for this cluster, only spot instances whose bid price - percentage matches this field will be considered. - Note that, for safety, we enforce this field to be no more than 10000. - "zone_id": - "description": |- - Identifier for the availability zone/datacenter in which the cluster resides. - This string will be of a form like "us-west-2a". The provided availability - zone must be in the same region as the Databricks deployment. For example, "us-west-2a" - is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. - This is an optional field at cluster creation, and if not specified, the zone "auto" will be used. - If the zone specified is "auto", will try to place cluster in a zone with high availability, - and will retry placement in a different AZ if there is not enough capacity. - - The list of available zones as well as the default value can be found by using the - `List Zones` method. -github.com/databricks/databricks-sdk-go/service/compute.AwsAvailability: - "_": - "description": |- - Availability type used for all subsequent nodes past the `first_on_demand` ones. - - Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - "enum": - - |- - SPOT - - |- - ON_DEMAND - - |- - SPOT_WITH_FALLBACK -github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes: - "_": - "description": |- - Attributes set during cluster creation which are related to Microsoft Azure. - "availability": - "description": |- - Availability type used for all subsequent nodes past the `first_on_demand` ones. - Note: If `first_on_demand` is zero, this availability - type will be used for the entire cluster. - "first_on_demand": - "description": |- - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. - This value should be greater than 0, to make sure the cluster driver node is placed on an - on-demand instance. If this value is greater than or equal to the current cluster size, all - nodes will be placed on on-demand instances. If this value is less than the current cluster - size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will - be placed on `availability` instances. Note that this value does not affect - cluster size and cannot currently be mutated over the lifetime of a cluster. - "log_analytics_info": - "description": |- - Defines values necessary to configure and run Azure Log Analytics agent - "spot_bid_max_price": - "description": |- - The max bid price to be used for Azure spot instances. - The Max price for the bid cannot be higher than the on-demand price of the instance. - If not specified, the default value is -1, which specifies that the instance cannot be evicted - on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. -github.com/databricks/databricks-sdk-go/service/compute.AzureAvailability: - "_": - "description": |- - Availability type used for all subsequent nodes past the `first_on_demand` ones. - Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - "enum": - - |- - SPOT_AZURE - - |- - ON_DEMAND_AZURE - - |- - SPOT_WITH_FALLBACK_AZURE -github.com/databricks/databricks-sdk-go/service/compute.ClientsTypes: - "jobs": - "description": |- - With jobs set, the cluster can be used for jobs - "notebooks": - "description": |- - With notebooks set, this cluster can be used for notebooks -github.com/databricks/databricks-sdk-go/service/compute.ClusterLogConf: - "_": - "description": |- - Cluster log delivery config - "dbfs": - "description": |- - destination needs to be provided. e.g. - `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - "s3": - "description": |- - destination and either the region or endpoint need to be provided. e.g. - `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` - Cluster iam role is used to access s3, please make sure the cluster iam role in - `instance_profile_arn` has permission to write data to the s3 destination. - "volumes": - "description": |- - destination needs to be provided, e.g. - `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` -github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec: - "_": - "description": |- - Contains a snapshot of the latest user specified settings that were used to create/edit the cluster. - "apply_policy_default_values": - "description": |- - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. - "autoscale": - "description": |- - Parameters needed in order to automatically scale clusters up and down based on load. - Note: autoscaling works best with DB runtime versions 3.0 or later. - "autotermination_minutes": - "description": |- - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, - this cluster will not be automatically terminated. If specified, the threshold must be between - 10 and 10000 minutes. - Users can also set this value to 0 to explicitly disable automatic termination. - "aws_attributes": - "description": |- - Attributes related to clusters running on Amazon Web Services. - If not specified at cluster creation, a set of default values will be used. - "azure_attributes": - "description": |- - Attributes related to clusters running on Microsoft Azure. - If not specified at cluster creation, a set of default values will be used. - "cluster_log_conf": - "description": |- - The configuration for delivering spark logs to a long-term storage destination. - Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified - for one cluster. If the conf is given, the logs will be delivered to the destination every - `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while - the destination of executor logs is `$destination/$clusterId/executor`. - "cluster_name": - "description": |- - Cluster name requested by the user. This doesn't have to be unique. - If not specified at creation, the cluster name will be an empty string. - For job clusters, the cluster name is automatically set based on the job and job run IDs. - "custom_tags": - "description": |- - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS - instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - - - Currently, Databricks allows at most 45 custom tags - - - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - "data_security_mode": - "description": |- - Data security mode decides what data governance model to use when accessing data - from a cluster. - - The following modes can only be used when `kind = CLASSIC_PREVIEW`. - * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. - * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. - * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. - - The following modes can be used regardless of `kind`. - * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. - * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. - * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. - - The following modes are deprecated starting with Databricks Runtime 15.0 and - will be removed for future Databricks Runtime versions: - - * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. - * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. - * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. - * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. - "docker_image": - "description": |- - Custom docker image BYOC - "driver_instance_pool_id": - "description": |- - The optional ID of the instance pool for the driver of the cluster belongs. - The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not - assigned. - "driver_node_type_flexibility": - "description": |- - Flexible node type configuration for the driver node. - "driver_node_type_id": - "description": |- - The node type of the Spark driver. - Note that this field is optional; if unset, the driver node type will be set as the same value - as `node_type_id` defined above. - - This field, along with node_type_id, should not be set if virtual_cluster_size is set. - If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. - "enable_elastic_disk": - "description": |- - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk - space when its Spark workers are running low on disk space. - "enable_local_disk_encryption": - "description": |- - Whether to enable LUKS on cluster VMs' local disks - "gcp_attributes": - "description": |- - Attributes related to clusters running on Google Cloud Platform. - If not specified at cluster creation, a set of default values will be used. - "init_scripts": - "description": |- - The configuration for storing init scripts. Any number of destinations can be specified. - The scripts are executed sequentially in the order provided. - If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - "instance_pool_id": - "description": |- - The optional ID of the instance pool to which the cluster belongs. - "is_single_node": - "description": |- - This field can only be used when `kind = CLASSIC_PREVIEW`. - - When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - "kind": - "description": |- - The kind of compute described by this compute specification. - - Depending on `kind`, different validations and default values will be applied. - - Clusters with `kind = CLASSIC_PREVIEW` support the following fields, whereas clusters with no specified `kind` do not. - * [is_single_node](/api/workspace/clusters/create#is_single_node) - * [use_ml_runtime](/api/workspace/clusters/create#use_ml_runtime) - * [data_security_mode](/api/workspace/clusters/create#data_security_mode) set to `DATA_SECURITY_MODE_AUTO`, `DATA_SECURITY_MODE_DEDICATED`, or `DATA_SECURITY_MODE_STANDARD` - - By using the [simple form](https://docs.databricks.com/compute/simple-form.html), your clusters are automatically using `kind = CLASSIC_PREVIEW`. - "node_type_id": - "description": |- - This field encodes, through a single value, the resources available to each of - the Spark nodes in this cluster. For example, the Spark nodes can be provisioned - and optimized for memory or compute intensive workloads. A list of available node - types can be retrieved by using the :method:clusters/listNodeTypes API call. - "num_workers": - "description": |- - Number of worker nodes that this cluster should have. A cluster has one Spark Driver - and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. - - Note: When reading the properties of a cluster, this field reflects the desired number - of workers rather than the actual current number of workers. For instance, if a cluster - is resized from 5 to 10 workers, this field will immediately be updated to reflect - the target size of 10 workers, whereas the workers listed in `spark_info` will gradually - increase from 5 to 10 as the new nodes are provisioned. - "policy_id": - "description": |- - The ID of the cluster policy used to create the cluster if applicable. - "remote_disk_throughput": - "description": |- - If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks. - "runtime_engine": - "description": |- - Determines the cluster's runtime engine, either standard or Photon. - - This field is not compatible with legacy `spark_version` values that contain `-photon-`. - Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. - - If left unspecified, the runtime engine defaults to standard unless the spark_version - contains -photon-, in which case Photon will be used. - "single_user_name": - "description": |- - Single user name if data_security_mode is `SINGLE_USER` - "spark_conf": - "description": |- - An object containing a set of optional, user-specified Spark configuration key-value pairs. - Users can also pass in a string of extra JVM options to the driver and the executors via - `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - "spark_env_vars": - "description": |- - An object containing a set of optional, user-specified environment variable key-value pairs. - Please note that key-value pair of the form (X,Y) will be exported as is (i.e., - `export X='Y'`) while launching the driver and workers. - - In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending - them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all - default databricks managed environmental variables are included as well. - - Example Spark environment variables: - `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or - `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - "spark_version": - "description": |- - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. - A list of available Spark versions can be retrieved by using - the :method:clusters/sparkVersions API call. - "ssh_public_keys": - "description": |- - SSH public key contents that will be added to each Spark node in this cluster. The - corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. - Up to 10 keys can be specified. - "total_initial_remote_disk_size": - "description": |- - If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks. - "use_ml_runtime": - "description": |- - This field can only be used when `kind = CLASSIC_PREVIEW`. - - `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. - "worker_node_type_flexibility": - "description": |- - Flexible node type configuration for worker nodes. - "workload_type": - "description": |- - Cluster Attributes showing for clusters workload types. -github.com/databricks/databricks-sdk-go/service/compute.DataSecurityMode: - "_": - "description": |- - Data security mode decides what data governance model to use when accessing data - from a cluster. - - The following modes can only be used when `kind = CLASSIC_PREVIEW`. - * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. - * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. - * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. - - The following modes can be used regardless of `kind`. - * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. - * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. - * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. - - The following modes are deprecated starting with Databricks Runtime 15.0 and - will be removed for future Databricks Runtime versions: - - * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. - * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. - * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. - * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. - "enum": - - |- - NONE - - |- - SINGLE_USER - - |- - USER_ISOLATION - - |- - LEGACY_TABLE_ACL - - |- - LEGACY_PASSTHROUGH - - |- - LEGACY_SINGLE_USER - - |- - LEGACY_SINGLE_USER_STANDARD - - |- - DATA_SECURITY_MODE_STANDARD - - |- - DATA_SECURITY_MODE_DEDICATED - - |- - DATA_SECURITY_MODE_AUTO -github.com/databricks/databricks-sdk-go/service/compute.DbfsStorageInfo: - "_": - "description": |- - A storage location in DBFS - "destination": - "description": |- - dbfs destination, e.g. `dbfs:/my/path` -github.com/databricks/databricks-sdk-go/service/compute.DockerBasicAuth: - "password": - "description": |- - Password of the user - "username": - "description": |- - Name of the user -github.com/databricks/databricks-sdk-go/service/compute.DockerImage: - "basic_auth": - "description": |- - Basic auth with username and password - "url": - "description": |- - URL of the docker image. -github.com/databricks/databricks-sdk-go/service/compute.EbsVolumeType: - "_": - "description": |- - All EBS volume types that Databricks supports. - See https://aws.amazon.com/ebs/details/ for details. - "enum": - - |- - GENERAL_PURPOSE_SSD - - |- - THROUGHPUT_OPTIMIZED_HDD -github.com/databricks/databricks-sdk-go/service/compute.Environment: - "_": - "description": |- - The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. - In this minimal environment spec, only pip dependencies are supported. - "base_environment": - "description": |- - The `base_environment` key refers to an `env.yaml` file that specifies an environment version and a collection of dependencies required for the environment setup. - This `env.yaml` file may itself include a `base_environment` reference pointing to another `env_1.yaml` file. However, when used as a base environment, `env_1.yaml` (or further nested references) will not be processed or included in the final environment, meaning that the resolution of `base_environment` references is not recursive. - "client": - "description": |- - Use `environment_version` instead. - "deprecation_message": |- - This field is deprecated - "dependencies": - "description": |- - List of pip dependencies, as supported by the version of pip in this environment. - Each dependency is a valid pip requirements file line per https://pip.pypa.io/en/stable/reference/requirements-file-format/. - Allowed dependencies include a requirement specifier, an archive URL, a local project path (such as WSFS or UC Volumes in Databricks), or a VCS project URL. - "environment_version": - "description": |- - Required. Environment version used by the environment. - Each version comes with a specific Python version and a set of Python packages. - The version is a string, consisting of an integer. - "java_dependencies": - "description": |- - List of java dependencies. Each dependency is a string representing a java library path. For example: `/Volumes/path/to/test.jar`. -github.com/databricks/databricks-sdk-go/service/compute.GcpAttributes: - "_": - "description": |- - Attributes set during cluster creation which are related to GCP. - "availability": - "description": |- - This field determines whether the spark executors will be scheduled to run on preemptible - VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - "boot_disk_size": - "description": |- - Boot disk size in GB - "first_on_demand": - "description": |- - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. - This value should be greater than 0, to make sure the cluster driver node is placed on an - on-demand instance. If this value is greater than or equal to the current cluster size, all - nodes will be placed on on-demand instances. If this value is less than the current cluster - size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will - be placed on `availability` instances. Note that this value does not affect - cluster size and cannot currently be mutated over the lifetime of a cluster. - "google_service_account": - "description": |- - If provided, the cluster will impersonate the google service account when accessing - gcloud services (like GCS). The google service account - must have previously been added to the Databricks environment by an account - administrator. - "local_ssd_count": - "description": |- - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. - Each local SSD is 375GB in size. - Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) - for the supported number of local SSDs for each instance type. - "use_preemptible_executors": - "description": |- - This field determines whether the spark executors will be scheduled to run on preemptible - VMs (when set to true) versus standard compute engine VMs (when set to false; default). - Note: Soon to be deprecated, use the 'availability' field instead. - "deprecation_message": |- - This field is deprecated - "zone_id": - "description": |- - Identifier for the availability zone in which the cluster resides. - This can be one of the following: - - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - - A GCP availability zone => Pick One of the available zones for (machine type + region) from - https://cloud.google.com/compute/docs/regions-zones. -github.com/databricks/databricks-sdk-go/service/compute.GcpAvailability: - "_": - "description": |- - This field determines whether the instance pool will contain preemptible - VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - "enum": - - |- - PREEMPTIBLE_GCP - - |- - ON_DEMAND_GCP - - |- - PREEMPTIBLE_WITH_FALLBACK_GCP -github.com/databricks/databricks-sdk-go/service/compute.GcsStorageInfo: - "_": - "description": |- - A storage location in Google Cloud Platform's GCS - "destination": - "description": |- - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` -github.com/databricks/databricks-sdk-go/service/compute.HardwareAcceleratorType: - "_": - "description": |- - HardwareAcceleratorType: The type of hardware accelerator to use for compute workloads. - NOTE: This enum is referenced and is intended to be used by other Databricks services - that need to specify hardware accelerator requirements for AI compute workloads. - "enum": - - |- - GPU_1xA10 - - |- - GPU_8xH100 -github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo: - "_": - "description": |- - Config for an individual init script - Next ID: 11 - "abfss": - "description": |- - destination needs to be provided, e.g. - `abfss://@.dfs.core.windows.net/` - "dbfs": - "description": |- - destination needs to be provided. e.g. - `{ "dbfs": { "destination" : "dbfs:/home/cluster_log" } }` - "deprecation_message": |- - This field is deprecated - "file": - "description": |- - destination needs to be provided, e.g. - `{ "file": { "destination": "file:/my/local/file.sh" } }` - "gcs": - "description": |- - destination needs to be provided, e.g. - `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` - "s3": - "description": |- - destination and either the region or endpoint need to be provided. e.g. - `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` - Cluster iam role is used to access s3, please make sure the cluster iam role in - `instance_profile_arn` has permission to write data to the s3 destination. - "volumes": - "description": |- - destination needs to be provided. e.g. - `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` - "workspace": - "description": |- - destination needs to be provided, e.g. - `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` -github.com/databricks/databricks-sdk-go/service/compute.Library: - "cran": - "description": |- - Specification of a CRAN library to be installed as part of the library - "egg": - "description": |- - Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above. - "deprecation_message": |- - This field is deprecated - "jar": - "description": |- - URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. - For example: `{ "jar": "/Workspace/path/to/library.jar" }`, `{ "jar" : "/Volumes/path/to/library.jar" }` or - `{ "jar": "s3://my-bucket/library.jar" }`. - If S3 is used, please make sure the cluster has read access on the library. You may need to - launch the cluster with an IAM role to access the S3 URI. - "maven": - "description": |- - Specification of a maven library to be installed. For example: - `{ "coordinates": "org.jsoup:jsoup:1.7.2" }` - "pypi": - "description": |- - Specification of a PyPi library to be installed. For example: - `{ "package": "simplejson" }` - "requirements": - "description": |- - URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported. - For example: `{ "requirements": "/Workspace/path/to/requirements.txt" }` or `{ "requirements" : "/Volumes/path/to/requirements.txt" }` - "whl": - "description": |- - URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. - For example: `{ "whl": "/Workspace/path/to/library.whl" }`, `{ "whl" : "/Volumes/path/to/library.whl" }` or - `{ "whl": "s3://my-bucket/library.whl" }`. - If S3 is used, please make sure the cluster has read access on the library. You may need to - launch the cluster with an IAM role to access the S3 URI. -github.com/databricks/databricks-sdk-go/service/compute.LocalFileInfo: - "destination": - "description": |- - local file destination, e.g. `file:/my/local/file.sh` -github.com/databricks/databricks-sdk-go/service/compute.LogAnalyticsInfo: - "log_analytics_primary_key": {} - "log_analytics_workspace_id": {} -github.com/databricks/databricks-sdk-go/service/compute.MavenLibrary: - "coordinates": - "description": |- - Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". - "exclusions": - "description": |- - List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. - - Maven dependency exclusions: - https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. - "repo": - "description": |- - Maven repo to install the Maven package from. If omitted, both Maven Central Repository - and Spark Packages are searched. -github.com/databricks/databricks-sdk-go/service/compute.NodeTypeFlexibility: - "_": - "description": |- - Configuration for flexible node types, allowing fallback to alternate node types during cluster launch and upscale. - "alternate_node_type_ids": - "description": |- - A list of node type IDs to use as fallbacks when the primary node type is unavailable. -github.com/databricks/databricks-sdk-go/service/compute.PythonPyPiLibrary: - "package": - "description": |- - The name of the pypi package to install. An optional exact version specification is also - supported. Examples: "simplejson" and "simplejson==3.8.0". - "repo": - "description": |- - The repository where the package can be found. If not specified, the default pip index is - used. -github.com/databricks/databricks-sdk-go/service/compute.RCranLibrary: - "package": - "description": |- - The name of the CRAN package to install. - "repo": - "description": |- - The repository where the package can be found. If not specified, the default CRAN repo is used. -github.com/databricks/databricks-sdk-go/service/compute.RuntimeEngine: - "_": - "enum": - - |- - NULL - - |- - STANDARD - - |- - PHOTON -github.com/databricks/databricks-sdk-go/service/compute.S3StorageInfo: - "_": - "description": |- - A storage location in Amazon S3 - "canned_acl": - "description": |- - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. - If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on - the destination bucket and prefix. The full list of possible canned acl can be found at - http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. - Please also note that by default only the object owner gets full controls. If you are using cross account - role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to - read the logs. - "destination": - "description": |- - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using - cluster iam role, please make sure you set cluster iam role and the role has write access to the - destination. Please also note that you cannot use AWS keys to deliver logs. - "enable_encryption": - "description": |- - (Optional) Flag to enable server side encryption, `false` by default. - "encryption_type": - "description": |- - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when - encryption is enabled and the default type is `sse-s3`. - "endpoint": - "description": |- - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. - If both are set, endpoint will be used. - "kms_key": - "description": |- - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - "region": - "description": |- - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, - endpoint will be used. -github.com/databricks/databricks-sdk-go/service/compute.VolumesStorageInfo: - "_": - "description": |- - A storage location back by UC Volumes. - "destination": - "description": |- - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` - or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` -github.com/databricks/databricks-sdk-go/service/compute.WorkloadType: - "_": - "description": |- - Cluster Attributes showing for clusters workload types. - "clients": - "description": |- - defined what type of clients can use the cluster. E.g. Notebooks, Jobs -github.com/databricks/databricks-sdk-go/service/compute.WorkspaceStorageInfo: - "_": - "description": |- - A storage location in Workspace Filesystem (WSFS) - "destination": - "description": |- - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` -github.com/databricks/databricks-sdk-go/service/dashboards.LifecycleState: - "_": - "enum": - - |- - ACTIVE - - |- - TRASHED -github.com/databricks/databricks-sdk-go/service/database.CustomTag: - "key": - "description": |- - The key of the custom tag. - "value": - "description": |- - The value of the custom tag. -github.com/databricks/databricks-sdk-go/service/database.DatabaseInstanceRef: - "_": - "description": |- - DatabaseInstanceRef is a reference to a database instance. It is used in the - DatabaseInstance object to refer to the parent instance of an instance and - to refer the child instances of an instance. - To specify as a parent instance during creation of an instance, - the lsn and branch_time fields are optional. If not specified, the child - instance will be created from the latest lsn of the parent. - If both lsn and branch_time are specified, the lsn will be used to create - the child instance. - "branch_time": - "description": |- - Branch time of the ref database instance. - For a parent ref instance, this is the point in time on the parent instance from which the - instance was created. - For a child ref instance, this is the point in time on the instance from which the child - instance was created. - Input: For specifying the point in time to create a child instance. Optional. - Output: Only populated if provided as input to create a child instance. - "effective_lsn": - "description": |- - For a parent ref instance, this is the LSN on the parent instance from which the - instance was created. - For a child ref instance, this is the LSN on the instance from which the child instance - was created. - This is an output only field that contains the value computed from the input field combined with - server side defaults. Use the field without the effective_ prefix to set the value. - "x-databricks-field-behaviors_output_only": |- - true - "lsn": - "description": |- - User-specified WAL LSN of the ref database instance. - - Input: For specifying the WAL LSN to create a child instance. Optional. - Output: Only populated if provided as input to create a child instance. - "name": - "description": |- - Name of the ref database instance. - "uid": - "description": |- - Id of the ref database instance. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/database.DatabaseInstanceState: - "_": - "enum": - - |- - STARTING - - |- - AVAILABLE - - |- - DELETING - - |- - STOPPED - - |- - UPDATING - - |- - FAILING_OVER -github.com/databricks/databricks-sdk-go/service/database.DeltaTableSyncInfo: - "delta_commit_timestamp": - "description": |- - The timestamp when the above Delta version was committed in the source Delta table. - Note: This is the Delta commit time, not the time the data was written to the synced table. - "x-databricks-field-behaviors_output_only": |- - true - "delta_commit_version": - "description": |- - The Delta Lake commit version that was last successfully synced. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/database.NewPipelineSpec: - "_": - "description": |- - Custom fields that user can set for pipeline while creating SyncedDatabaseTable. - Note that other fields of pipeline are still inferred by table def internally - "budget_policy_id": - "description": |- - Budget policy to set on the newly created pipeline. - "storage_catalog": - "description": |- - This field needs to be specified if the destination catalog is a managed postgres catalog. - - UC catalog for the pipeline to store intermediate files (checkpoints, event logs etc). - This needs to be a standard catalog where the user has permissions to create Delta tables. - "storage_schema": - "description": |- - This field needs to be specified if the destination catalog is a managed postgres catalog. - - UC schema for the pipeline to store intermediate files (checkpoints, event logs etc). - This needs to be in the standard catalog where the user has permissions to create Delta tables. -github.com/databricks/databricks-sdk-go/service/database.ProvisioningInfoState: - "_": - "enum": - - |- - PROVISIONING - - |- - ACTIVE - - |- - FAILED - - |- - DELETING - - |- - UPDATING - - |- - DEGRADED -github.com/databricks/databricks-sdk-go/service/database.ProvisioningPhase: - "_": - "enum": - - |- - PROVISIONING_PHASE_MAIN - - |- - PROVISIONING_PHASE_INDEX_SCAN - - |- - PROVISIONING_PHASE_INDEX_SORT -github.com/databricks/databricks-sdk-go/service/database.SyncedTableContinuousUpdateStatus: - "_": - "description": |- - Detailed status of a synced table. Shown if the synced table is in the SYNCED_CONTINUOUS_UPDATE - or the SYNCED_UPDATING_PIPELINE_RESOURCES state. - "initial_pipeline_sync_progress": - "description": |- - Progress of the initial data synchronization. - "x-databricks-field-behaviors_output_only": |- - true - "last_processed_commit_version": - "description": |- - The last source table Delta version that was successfully synced to the synced table. - "x-databricks-field-behaviors_output_only": |- - true - "timestamp": - "description": |- - The end timestamp of the last time any data was synchronized from the source table to the synced - table. This is when the data is available in the synced table. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/database.SyncedTableFailedStatus: - "_": - "description": |- - Detailed status of a synced table. Shown if the synced table is in the OFFLINE_FAILED or the - SYNCED_PIPELINE_FAILED state. - "last_processed_commit_version": - "description": |- - The last source table Delta version that was successfully synced to the synced table. - The last source table Delta version that was synced to the synced table. - Only populated if the table is still - synced and available for serving. - "x-databricks-field-behaviors_output_only": |- - true - "timestamp": - "description": |- - The end timestamp of the last time any data was synchronized from the source table to the synced - table. Only populated if the table is still synced and available for serving. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/database.SyncedTablePipelineProgress: - "_": - "description": |- - Progress information of the Synced Table data synchronization pipeline. - "estimated_completion_time_seconds": - "description": |- - The estimated time remaining to complete this update in seconds. - "x-databricks-field-behaviors_output_only": |- - true - "latest_version_currently_processing": - "description": |- - The source table Delta version that was last processed by the pipeline. The pipeline may not - have completely processed this version yet. - "x-databricks-field-behaviors_output_only": |- - true - "provisioning_phase": - "description": |- - The current phase of the data synchronization pipeline. - "x-databricks-field-behaviors_output_only": |- - true - "sync_progress_completion": - "description": |- - The completion ratio of this update. This is a number between 0 and 1. - "x-databricks-field-behaviors_output_only": |- - true - "synced_row_count": - "description": |- - The number of rows that have been synced in this update. - "x-databricks-field-behaviors_output_only": |- - true - "total_row_count": - "description": |- - The total number of rows that need to be synced in this update. This number may be an estimate. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/database.SyncedTablePosition: - "delta_table_sync_info": - "x-databricks-field-behaviors_output_only": |- - true - "sync_end_timestamp": - "description": |- - The end timestamp of the most recent successful synchronization. - This is the time when the data is available in the synced table. - "x-databricks-field-behaviors_output_only": |- - true - "sync_start_timestamp": - "description": |- - The starting timestamp of the most recent successful synchronization from the source table - to the destination (synced) table. - Note this is the starting timestamp of the sync operation, not the end time. - E.g., for a batch, this is the time when the sync operation started. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/database.SyncedTableProvisioningStatus: - "_": - "description": |- - Detailed status of a synced table. Shown if the synced table is in the - PROVISIONING_PIPELINE_RESOURCES or the PROVISIONING_INITIAL_SNAPSHOT state. - "initial_pipeline_sync_progress": - "description": |- - Details about initial data synchronization. Only populated when in the - PROVISIONING_INITIAL_SNAPSHOT state. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/database.SyncedTableSchedulingPolicy: - "_": - "enum": - - |- - CONTINUOUS - - |- - TRIGGERED - - |- - SNAPSHOT -github.com/databricks/databricks-sdk-go/service/database.SyncedTableSpec: - "_": - "description": |- - Specification of a synced database table. - "create_database_objects_if_missing": - "description": |- - If true, the synced table's logical database and schema resources in PG - will be created if they do not already exist. - "existing_pipeline_id": - "description": |- - At most one of existing_pipeline_id and new_pipeline_spec should be defined. - - If existing_pipeline_id is defined, the synced table will be bin packed into the existing pipeline - referenced. This avoids creating a new pipeline and allows sharing existing compute. - In this case, the scheduling_policy of this synced table must match the scheduling policy of the existing pipeline. - "new_pipeline_spec": - "description": |- - At most one of existing_pipeline_id and new_pipeline_spec should be defined. - - If new_pipeline_spec is defined, a new pipeline is created for this synced table. The location pointed to is used - to store intermediate files (checkpoints, event logs etc). The caller must have write permissions to create Delta - tables in the specified catalog and schema. Again, note this requires write permissions, whereas the source table - only requires read permissions. - "primary_key_columns": - "description": |- - Primary Key columns to be used for data insert/update in the destination. - "scheduling_policy": - "description": |- - Scheduling policy of the underlying pipeline. - "source_table_full_name": - "description": |- - Three-part (catalog, schema, table) name of the source Delta table. - "timeseries_key": - "description": |- - Time series key to deduplicate (tie-break) rows with the same primary key. -github.com/databricks/databricks-sdk-go/service/database.SyncedTableState: - "_": - "description": |- - The state of a synced table. - "enum": - - |- - SYNCED_TABLE_PROVISIONING - - |- - SYNCED_TABLE_PROVISIONING_PIPELINE_RESOURCES - - |- - SYNCED_TABLE_PROVISIONING_INITIAL_SNAPSHOT - - |- - SYNCED_TABLE_ONLINE - - |- - SYNCED_TABLE_ONLINE_CONTINUOUS_UPDATE - - |- - SYNCED_TABLE_ONLINE_TRIGGERED_UPDATE - - |- - SYNCED_TABLE_ONLINE_NO_PENDING_UPDATE - - |- - SYNCED_TABLED_OFFLINE - - |- - SYNCED_TABLE_OFFLINE_FAILED - - |- - SYNCED_TABLE_ONLINE_PIPELINE_FAILED - - |- - SYNCED_TABLE_ONLINE_UPDATING_PIPELINE_RESOURCES -github.com/databricks/databricks-sdk-go/service/database.SyncedTableStatus: - "_": - "description": |- - Status of a synced table. - "continuous_update_status": - "description": |- - Detailed status of a synced table. Shown if the synced table is in the SYNCED_CONTINUOUS_UPDATE - or the SYNCED_UPDATING_PIPELINE_RESOURCES state. - "detailed_state": - "description": |- - The state of the synced table. - "x-databricks-field-behaviors_output_only": |- - true - "failed_status": - "description": |- - Detailed status of a synced table. Shown if the synced table is in the OFFLINE_FAILED or the - SYNCED_PIPELINE_FAILED state. - "last_sync": - "description": |- - Summary of the last successful synchronization from source to destination. - - Will always be present if there has been a successful sync. Even if the most recent syncs have failed. - - Limitation: - The only exception is if the synced table is doing a FULL REFRESH, then the last sync information - will not be available until the full refresh is complete. This limitation will be addressed in a future version. - - This top-level field is a convenience for consumers who want easy access to last sync information - without having to traverse detailed_status. - "x-databricks-field-behaviors_output_only": |- - true - "message": - "description": |- - A text description of the current state of the synced table. - "x-databricks-field-behaviors_output_only": |- - true - "pipeline_id": - "description": |- - ID of the associated pipeline. The pipeline ID may have been provided by the client - (in the case of bin packing), or generated by the server (when creating a new pipeline). - "x-databricks-field-behaviors_output_only": |- - true - "provisioning_status": - "description": |- - Detailed status of a synced table. Shown if the synced table is in the - PROVISIONING_PIPELINE_RESOURCES or the PROVISIONING_INITIAL_SNAPSHOT state. - "triggered_update_status": - "description": |- - Detailed status of a synced table. Shown if the synced table is in the SYNCED_TRIGGERED_UPDATE - or the SYNCED_NO_PENDING_UPDATE state. -github.com/databricks/databricks-sdk-go/service/database.SyncedTableTriggeredUpdateStatus: - "_": - "description": |- - Detailed status of a synced table. Shown if the synced table is in the SYNCED_TRIGGERED_UPDATE - or the SYNCED_NO_PENDING_UPDATE state. - "last_processed_commit_version": - "description": |- - The last source table Delta version that was successfully synced to the synced table. - "x-databricks-field-behaviors_output_only": |- - true - "timestamp": - "description": |- - The end timestamp of the last time any data was synchronized from the source table to the synced - table. This is when the data is available in the synced table. - "x-databricks-field-behaviors_output_only": |- - true - "triggered_update_progress": - "description": |- - Progress of the active data synchronization pipeline. - "x-databricks-field-behaviors_output_only": |- - true -github.com/databricks/databricks-sdk-go/service/jobs.AuthenticationMethod: - "_": - "enum": - - |- - OAUTH - - |- - PAT -github.com/databricks/databricks-sdk-go/service/jobs.CleanRoomsNotebookTask: - "_": - "description": |- - Clean Rooms notebook task for V1 Clean Room service (GA). - Replaces the deprecated CleanRoomNotebookTask (defined above) which was for V0 service. - "clean_room_name": - "description": |- - The clean room that the notebook belongs to. - "etag": - "description": |- - Checksum to validate the freshness of the notebook resource (i.e. the notebook being run is the latest version). - It can be fetched by calling the :method:cleanroomassets/get API. - "notebook_base_parameters": - "description": |- - Base parameters to be used for the clean room notebook job. - "notebook_name": - "description": |- - Name of the notebook being run. -github.com/databricks/databricks-sdk-go/service/jobs.Compute: - "hardware_accelerator": - "description": |- - Hardware accelerator configuration for Serverless GPU workloads. -github.com/databricks/databricks-sdk-go/service/jobs.ComputeConfig: - "gpu_node_pool_id": - "description": |- - IDof the GPU pool to use. - "gpu_type": - "description": |- - GPU type. - "num_gpus": - "description": |- - Number of GPUs. -github.com/databricks/databricks-sdk-go/service/jobs.Condition: - "_": - "enum": - - |- - ANY_UPDATED - - |- - ALL_UPDATED -github.com/databricks/databricks-sdk-go/service/jobs.ConditionTask: - "left": - "description": |- - The left operand of the condition task. Can be either a string value or a job state or parameter reference. - "op": - "description": |- - * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. - * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. - - The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. - "right": - "description": |- - The right operand of the condition task. Can be either a string value or a job state or parameter reference. -github.com/databricks/databricks-sdk-go/service/jobs.ConditionTaskOp: - "_": - "description": |- - * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. - * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. - - The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. - "enum": - - |- - EQUAL_TO - - |- - GREATER_THAN - - |- - GREATER_THAN_OR_EQUAL - - |- - LESS_THAN - - |- - LESS_THAN_OR_EQUAL - - |- - NOT_EQUAL -github.com/databricks/databricks-sdk-go/service/jobs.Continuous: - "pause_status": - "description": |- - Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. - "task_retry_mode": - "description": |- - Indicate whether the continuous job is applying task level retries or not. Defaults to NEVER. -github.com/databricks/databricks-sdk-go/service/jobs.CronSchedule: - "pause_status": - "description": |- - Indicate whether this schedule is paused or not. - "quartz_cron_expression": - "description": |- - A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. - "timezone_id": - "description": |- - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. -github.com/databricks/databricks-sdk-go/service/jobs.DashboardTask: - "_": - "description": |- - Configures the Lakeview Dashboard job task type. - "dashboard_id": - "description": |- - The identifier of the dashboard to refresh. - "filters": - "description": |- - Dashboard task parameters. Used to apply dashboard filter values during dashboard task execution. Parameter values get applied to any dashboard filters that have a matching URL identifier as the parameter key. - The parameter value format is dependent on the filter type: - - For text and single-select filters, provide a single value (e.g. `"value"`) - - For date and datetime filters, provide the value in ISO 8601 format (e.g. `"2000-01-01T00:00:00"`) - - For multi-select filters, provide a JSON array of values (e.g. `"[\"value1\",\"value2\"]"`) - - For range and date range filters, provide a JSON object with `start` and `end` (e.g. `"{\"start\":\"1\",\"end\":\"10\"}"`) - "x-databricks-preview": |- - PRIVATE - "subscription": - "description": |- - Optional: subscription configuration for sending the dashboard snapshot. - "warehouse_id": - "description": |- - Optional: The warehouse id to execute the dashboard with for the schedule. - If not specified, the default warehouse of the dashboard will be used. -github.com/databricks/databricks-sdk-go/service/jobs.DbtCloudTask: - "_": - "description": |- - Deprecated in favor of DbtPlatformTask - "connection_resource_name": - "description": |- - The resource name of the UC connection that authenticates the dbt Cloud for this task - "dbt_cloud_job_id": - "description": |- - Id of the dbt Cloud job to be triggered -github.com/databricks/databricks-sdk-go/service/jobs.DbtPlatformTask: - "connection_resource_name": - "description": |- - The resource name of the UC connection that authenticates the dbt platform for this task - "dbt_platform_job_id": - "description": |- - Id of the dbt platform job to be triggered. Specified as a string for maximum compatibility with clients. -github.com/databricks/databricks-sdk-go/service/jobs.DbtTask: - "catalog": - "description": |- - Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1. - "commands": - "description": |- - A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided. - "profiles_directory": - "description": |- - Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used. - "project_directory": - "description": |- - Path to the project directory. Optional for Git sourced tasks, in which - case if no value is provided, the root of the Git repository is used. - "schema": - "description": |- - Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used. - "source": - "description": |- - Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved - from the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository - defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. - - * `WORKSPACE`: Project is located in Databricks workspace. - * `GIT`: Project is located in cloud Git provider. - "warehouse_id": - "description": |- - ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument. -github.com/databricks/databricks-sdk-go/service/jobs.FileArrivalTriggerConfiguration: - "min_time_between_triggers_seconds": - "description": |- - If set, the trigger starts a run only after the specified amount of time passed since - the last time the trigger fired. The minimum allowed value is 60 seconds - "url": - "description": |- - URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. - "wait_after_last_change_seconds": - "description": |- - If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. - This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The - minimum allowed value is 60 seconds. -github.com/databricks/databricks-sdk-go/service/jobs.ForEachTask: - "concurrency": - "description": |- - An optional maximum allowed number of concurrent runs of the task. - Set this value if you want to be able to execute multiple runs of the task concurrently. - "inputs": - "description": |- - Array for task to iterate on. This can be a JSON string or a reference to - an array parameter. - "task": - "description": |- - Configuration for the task that will be run for each element in the array -github.com/databricks/databricks-sdk-go/service/jobs.Format: - "_": - "enum": - - |- - SINGLE_TASK - - |- - MULTI_TASK -github.com/databricks/databricks-sdk-go/service/jobs.GenAiComputeTask: - "command": - "description": |- - Command launcher to run the actual script, e.g. bash, python etc. - "compute": {} - "dl_runtime_image": - "description": |- - Runtime image - "mlflow_experiment_name": - "description": |- - Optional string containing the name of the MLflow experiment to log the run to. If name is not - found, backend will create the mlflow experiment using the name. - "source": - "description": |- - Optional location type of the training script. When set to `WORKSPACE`, the script will be retrieved from the local Databricks workspace. When set to `GIT`, the script will be retrieved from a Git repository - defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. - * `WORKSPACE`: Script is located in Databricks workspace. - * `GIT`: Script is located in cloud Git provider. - "training_script_path": - "description": |- - The training script file path to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required. - "yaml_parameters": - "description": |- - Optional string containing model parameters passed to the training script in yaml format. - If present, then the content in yaml_parameters_file_path will be ignored. - "yaml_parameters_file_path": - "description": |- - Optional path to a YAML file containing model parameters passed to the training script. -github.com/databricks/databricks-sdk-go/service/jobs.GitProvider: - "_": - "enum": - - |- - gitHub - - |- - bitbucketCloud - - |- - azureDevOpsServices - - |- - gitHubEnterprise - - |- - bitbucketServer - - |- - gitLab - - |- - gitLabEnterpriseEdition - - |- - awsCodeCommit -github.com/databricks/databricks-sdk-go/service/jobs.GitSnapshot: - "_": - "description": |- - Read-only state of the remote repository at the time the job was run. This field is only included on job runs. - "used_commit": - "description": |- - Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to. -github.com/databricks/databricks-sdk-go/service/jobs.GitSource: - "_": - "description": |- - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. - - If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. - - Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. - "git_branch": - "description": |- - Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. - "git_commit": - "description": |- - Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. - "git_provider": - "description": |- - Unique identifier of the service used to host the Git repository. The value is case insensitive. - "git_snapshot": - "description": |- - Read-only state of the remote repository at the time the job was run. This field is only included on job runs. - "git_tag": - "description": |- - Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. - "git_url": - "description": |- - URL of the repository to be cloned by this job. - "job_source": - "description": |- - The source of the job specification in the remote repository when the job is source controlled. - "deprecation_message": |- - This field is deprecated - "x-databricks-preview": |- - PRIVATE - "sparse_checkout": - "x-databricks-preview": |- - PRIVATE -github.com/databricks/databricks-sdk-go/service/jobs.JobCluster: - "job_cluster_key": - "description": |- - A unique name for the job cluster. This field is required and must be unique within the job. - `JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution. - "new_cluster": - "description": |- - If new_cluster, a description of a cluster that is created for each task. -github.com/databricks/databricks-sdk-go/service/jobs.JobDeployment: - "kind": - "description": |- - The kind of deployment that manages the job. - - * `BUNDLE`: The job is managed by Databricks Asset Bundle. - * `SYSTEM_MANAGED`: The job is managed by Databricks and is read-only. - "metadata_file_path": - "description": |- - Path of the file that contains deployment metadata. -github.com/databricks/databricks-sdk-go/service/jobs.JobDeploymentKind: - "_": - "description": |- - * `BUNDLE`: The job is managed by Databricks Asset Bundle. - * `SYSTEM_MANAGED`: The job is managed by Databricks and is read-only. - "enum": - - |- - BUNDLE - - |- - SYSTEM_MANAGED -github.com/databricks/databricks-sdk-go/service/jobs.JobEditMode: - "_": - "description": |- - Edit mode of the job. - - * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. - * `EDITABLE`: The job is in an editable state and can be modified. - "enum": - - |- - UI_LOCKED - - |- - EDITABLE -github.com/databricks/databricks-sdk-go/service/jobs.JobEmailNotifications: - "no_alert_for_skipped_runs": - "description": |- - If true, do not send email to recipients specified in `on_failure` if the run is skipped. - This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. - "deprecation_message": |- - This field is deprecated - "on_duration_warning_threshold_exceeded": - "description": |- - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. - "on_failure": - "description": |- - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. - "on_start": - "description": |- - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - "on_streaming_backlog_exceeded": - "description": |- - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. - Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. - Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - "on_success": - "description": |- - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. -github.com/databricks/databricks-sdk-go/service/jobs.JobEnvironment: - "environment_key": - "description": |- - The key of an environment. It has to be unique within a job. - "spec": - "description": |- - The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. - In this minimal environment spec, only pip dependencies are supported. -github.com/databricks/databricks-sdk-go/service/jobs.JobNotificationSettings: - "no_alert_for_canceled_runs": - "description": |- - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. - "no_alert_for_skipped_runs": - "description": |- - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. -github.com/databricks/databricks-sdk-go/service/jobs.JobParameterDefinition: - "default": - "description": |- - Default value of the parameter. - "name": - "description": |- - The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.` -github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs: - "_": - "description": |- - Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. - - Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. - "group_name": - "description": |- - Group name of an account group assigned to the workspace. Setting this field requires being a member of the group. - "x-databricks-preview": |- - PRIVATE - "service_principal_name": - "description": |- - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - "user_name": - "description": |- - The email of an active workspace user. Non-admin users can only set this field to their own email. -github.com/databricks/databricks-sdk-go/service/jobs.JobSource: - "_": - "description": |- - The source of the job specification in the remote repository when the job is source controlled. - "dirty_state": - "description": |- - Dirty state indicates the job is not fully synced with the job specification in the remote repository. - - Possible values are: - * `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced. - * `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced. - "import_from_git_branch": - "description": |- - Name of the branch which the job is imported from. - "job_config_path": - "description": |- - Path of the job YAML file that contains the job specification. -github.com/databricks/databricks-sdk-go/service/jobs.JobSourceDirtyState: - "_": - "description": |- - Dirty state indicates the job is not fully synced with the job specification - in the remote repository. - - Possible values are: - * `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced. - * `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced. - "enum": - - |- - NOT_SYNCED - - |- - DISCONNECTED -github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthMetric: - "_": - "description": |- - Specifies the health metric that is being evaluated for a particular health rule. - - * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. - * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. - * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. - * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. - * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. - "enum": - - |- - RUN_DURATION_SECONDS - - |- - STREAMING_BACKLOG_BYTES - - |- - STREAMING_BACKLOG_RECORDS - - |- - STREAMING_BACKLOG_SECONDS - - |- - STREAMING_BACKLOG_FILES -github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthOperator: - "_": - "description": |- - Specifies the operator used to compare the health metric value with the specified threshold. - "enum": - - |- - GREATER_THAN -github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRule: - "metric": - "description": |- - Specifies the health metric that is being evaluated for a particular health rule. - - * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. - * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. - * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. - * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. - * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. - "op": - "description": |- - Specifies the operator used to compare the health metric value with the specified threshold. - "value": - "description": |- - Specifies the threshold value that the health metric should obey to satisfy the health rule. -github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules: - "_": - "description": |- - An optional set of health rules that can be defined for this job. - "rules": {} -github.com/databricks/databricks-sdk-go/service/jobs.ModelTriggerConfiguration: - "aliases": - "description": |- - Aliases of the model versions to monitor. Can only be used in conjunction with condition MODEL_ALIAS_SET. - "condition": - "description": |- - The condition based on which to trigger a job run. - "min_time_between_triggers_seconds": - "description": |- - If set, the trigger starts a run only after the specified amount of time has passed since - the last time the trigger fired. The minimum allowed value is 60 seconds. - "securable_name": - "description": |- - Name of the securable to monitor ("mycatalog.myschema.mymodel" in the case of model-level triggers, - "mycatalog.myschema" in the case of schema-level triggers) or empty in the case of metastore-level triggers. - "wait_after_last_change_seconds": - "description": |- - If set, the trigger starts a run only after no model updates have occurred for the specified time - and can be used to wait for a series of model updates before triggering a run. The - minimum allowed value is 60 seconds. -github.com/databricks/databricks-sdk-go/service/jobs.ModelTriggerConfigurationCondition: - "_": - "enum": - - |- - MODEL_CREATED - - |- - MODEL_VERSION_READY - - |- - MODEL_ALIAS_SET -github.com/databricks/databricks-sdk-go/service/jobs.NotebookTask: - "base_parameters": - "description": |- - Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run - Now with parameters specified, the two parameters maps are merged. If the same key is specified in - `base_parameters` and in `run-now`, the value from `run-now` is used. - Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. - - If the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters, - the default value from the notebook is used. - - Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets). - - The JSON representation of this field cannot exceed 1MB. - "notebook_path": - "description": |- - The path of the notebook to be run in the Databricks workspace or remote repository. - For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash. - For notebooks stored in a remote repository, the path must be relative. This field is required. - "source": - "description": |- - Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository - defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. - * `WORKSPACE`: Notebook is located in Databricks workspace. - * `GIT`: Notebook is located in cloud Git provider. - "warehouse_id": - "description": |- - Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses. - - Note that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail. -github.com/databricks/databricks-sdk-go/service/jobs.PauseStatus: - "_": - "enum": - - |- - UNPAUSED - - |- - PAUSED -github.com/databricks/databricks-sdk-go/service/jobs.PerformanceTarget: - "_": - "description": |- - PerformanceTarget defines how performant (lower latency) or cost efficient the execution of run on serverless compute should be. - The performance mode on the job or pipeline should map to a performance setting that is passed to Cluster Manager - (see cluster-common PerformanceTarget). - "enum": - - |- - PERFORMANCE_OPTIMIZED - - |- - STANDARD -github.com/databricks/databricks-sdk-go/service/jobs.PeriodicTriggerConfiguration: - "interval": - "description": |- - The interval at which the trigger should run. - "unit": - "description": |- - The unit of time for the interval. -github.com/databricks/databricks-sdk-go/service/jobs.PeriodicTriggerConfigurationTimeUnit: - "_": - "enum": - - |- - HOURS - - |- - DAYS - - |- - WEEKS -github.com/databricks/databricks-sdk-go/service/jobs.PipelineParams: - "full_refresh": - "description": |- - If true, triggers a full refresh on the delta live table. -github.com/databricks/databricks-sdk-go/service/jobs.PipelineTask: - "full_refresh": - "description": |- - If true, triggers a full refresh on the delta live table. - "pipeline_id": - "description": |- - The full name of the pipeline task to execute. -github.com/databricks/databricks-sdk-go/service/jobs.PowerBiModel: - "authentication_method": - "description": |- - How the published Power BI model authenticates to Databricks - "model_name": - "description": |- - The name of the Power BI model - "overwrite_existing": - "description": |- - Whether to overwrite existing Power BI models - "storage_mode": - "description": |- - The default storage mode of the Power BI model - "workspace_name": - "description": |- - The name of the Power BI workspace of the model -github.com/databricks/databricks-sdk-go/service/jobs.PowerBiTable: - "catalog": - "description": |- - The catalog name in Databricks - "name": - "description": |- - The table name in Databricks - "schema": - "description": |- - The schema name in Databricks - "storage_mode": - "description": |- - The Power BI storage mode of the table -github.com/databricks/databricks-sdk-go/service/jobs.PowerBiTask: - "connection_resource_name": - "description": |- - The resource name of the UC connection to authenticate from Databricks to Power BI - "power_bi_model": - "description": |- - The semantic model to update - "refresh_after_update": - "description": |- - Whether the model should be refreshed after the update - "tables": - "description": |- - The tables to be exported to Power BI - "warehouse_id": - "description": |- - The SQL warehouse ID to use as the Power BI data source -github.com/databricks/databricks-sdk-go/service/jobs.PythonWheelTask: - "entry_point": - "description": |- - Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()` - "named_parameters": - "description": |- - Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null. - "package_name": - "description": |- - Name of the package to execute - "parameters": - "description": |- - Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null. -github.com/databricks/databricks-sdk-go/service/jobs.QueueSettings: - "enabled": - "description": |- - If true, enable queueing for the job. This is a required field. -github.com/databricks/databricks-sdk-go/service/jobs.RunIf: - "_": - "description": |- - An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`. - - Possible values are: - * `ALL_SUCCESS`: All dependencies have executed and succeeded - * `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded - * `NONE_FAILED`: None of the dependencies have failed and at least one was executed - * `ALL_DONE`: All dependencies have been completed - * `AT_LEAST_ONE_FAILED`: At least one dependency failed - * `ALL_FAILED`: ALl dependencies have failed - "enum": - - |- - ALL_SUCCESS - - |- - ALL_DONE - - |- - NONE_FAILED - - |- - AT_LEAST_ONE_SUCCESS - - |- - ALL_FAILED - - |- - AT_LEAST_ONE_FAILED -github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask: - "dbt_commands": - "description": |- - An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt deps", "dbt seed", "dbt deps", "dbt seed", "dbt run"]` - - ⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks. - "deprecation_message": |- - This field is deprecated - "x-databricks-preview": |- - PRIVATE - "jar_params": - "description": |- - A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. - The parameters are used to invoke the main function of the main class specified in the Spark JAR task. - If not specified upon `run-now`, it defaults to an empty list. - jar_params cannot be specified in conjunction with notebook_params. - The JSON representation of this field (for example `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. - - ⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks. - "deprecation_message": |- - This field is deprecated - "x-databricks-preview": |- - PRIVATE - "job_id": - "description": |- - ID of the job to trigger. - "job_parameters": - "description": |- - Job-level parameters used to trigger the job. - "notebook_params": - "description": |- - A map from keys to values for jobs with notebook task, for example `"notebook_params": {"name": "john doe", "age": "35"}`. - The map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function. - - If not specified upon `run-now`, the triggered run uses the job’s base parameters. - - notebook_params cannot be specified in conjunction with jar_params. - - ⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks. - - The JSON representation of this field (for example `{"notebook_params":{"name":"john doe","age":"35"}}`) cannot exceed 10,000 bytes. - "deprecation_message": |- - This field is deprecated - "x-databricks-preview": |- - PRIVATE - "pipeline_params": - "description": |- - Controls whether the pipeline should perform a full refresh - "python_named_params": - "deprecation_message": |- - This field is deprecated - "x-databricks-preview": |- - PRIVATE - "python_params": - "description": |- - A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. - The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite - the parameters specified in job setting. The JSON representation of this field (for example `{"python_params":["john doe","35"]}`) - cannot exceed 10,000 bytes. - - ⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks. - - Important - - These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error. - Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis. - "deprecation_message": |- - This field is deprecated - "x-databricks-preview": |- - PRIVATE - "spark_submit_params": - "description": |- - A list of parameters for jobs with spark submit task, for example `"spark_submit_params": ["--class", "org.apache.spark.examples.SparkPi"]`. - The parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the - parameters specified in job setting. The JSON representation of this field (for example `{"python_params":["john doe","35"]}`) - cannot exceed 10,000 bytes. - - ⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks. - - Important - - These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error. - Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis. - "deprecation_message": |- - This field is deprecated - "x-databricks-preview": |- - PRIVATE - "sql_params": - "description": |- - A map from keys to values for jobs with SQL task, for example `"sql_params": {"name": "john doe", "age": "35"}`. The SQL alert task does not support custom parameters. - - ⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks. - "deprecation_message": |- - This field is deprecated - "x-databricks-preview": |- - PRIVATE -github.com/databricks/databricks-sdk-go/service/jobs.Source: - "_": - "description": |- - Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\ - from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository - defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. - - * `WORKSPACE`: SQL file is located in Databricks workspace. - * `GIT`: SQL file is located in cloud Git provider. - "enum": - - |- - WORKSPACE - - |- - GIT -github.com/databricks/databricks-sdk-go/service/jobs.SparkJarTask: - "jar_uri": - "description": |- - Deprecated since 04/2016. For classic compute, provide a `jar` through the `libraries` field instead. For serverless compute, provide a `jar` though the `java_dependencies` field inside the `environments` list. - - See the examples of classic and serverless compute usage at the top of the page. - "deprecation_message": |- - This field is deprecated - "main_class_name": - "description": |- - The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library. - - The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail. - "parameters": - "description": |- - Parameters passed to the main method. - - Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. - "run_as_repl": - "description": |- - Deprecated. A value of `false` is no longer supported. - "deprecation_message": |- - This field is deprecated -github.com/databricks/databricks-sdk-go/service/jobs.SparkPythonTask: - "parameters": - "description": |- - Command line parameters passed to the Python file. - - Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. - "python_file": - "description": |- - The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required. - "source": - "description": |- - Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local - Databricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, - the Python file will be retrieved from a Git repository defined in `git_source`. - - * `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI. - * `GIT`: The Python file is located in a remote Git repository. -github.com/databricks/databricks-sdk-go/service/jobs.SparkSubmitTask: - "parameters": - "description": |- - Command-line parameters passed to spark submit. - - Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. -github.com/databricks/databricks-sdk-go/service/jobs.SparseCheckout: - "patterns": - "description": |- - List of patterns to include for sparse checkout. -github.com/databricks/databricks-sdk-go/service/jobs.SqlTask: - "alert": - "description": |- - If alert, indicates that this job must refresh a SQL alert. - "dashboard": - "description": |- - If dashboard, indicates that this job must refresh a SQL dashboard. - "file": - "description": |- - If file, indicates that this job runs a SQL file in a remote Git repository. - "parameters": - "description": |- - Parameters to be used for each run of this job. The SQL alert task does not support custom parameters. - "query": - "description": |- - If query, indicates that this job must execute a SQL query. - "warehouse_id": - "description": |- - The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs. -github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskAlert: - "alert_id": - "description": |- - The canonical identifier of the SQL alert. - "pause_subscriptions": - "description": |- - If true, the alert notifications are not sent to subscribers. - "subscriptions": - "description": |- - If specified, alert notifications are sent to subscribers. -github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskDashboard: - "custom_subject": - "description": |- - Subject of the email sent to subscribers of this task. - "dashboard_id": - "description": |- - The canonical identifier of the SQL dashboard. - "pause_subscriptions": - "description": |- - If true, the dashboard snapshot is not taken, and emails are not sent to subscribers. - "subscriptions": - "description": |- - If specified, dashboard snapshots are sent to subscriptions. -github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskFile: - "path": - "description": |- - Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths. - "source": - "description": |- - Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved - from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository - defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. - - * `WORKSPACE`: SQL file is located in Databricks workspace. - * `GIT`: SQL file is located in cloud Git provider. -github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskQuery: - "query_id": - "description": |- - The canonical identifier of the SQL query. -github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskSubscription: - "destination_id": - "description": |- - The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. - "user_name": - "description": |- - The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. -github.com/databricks/databricks-sdk-go/service/jobs.StorageMode: - "_": - "enum": - - |- - DIRECT_QUERY - - |- - IMPORT - - |- - DUAL -github.com/databricks/databricks-sdk-go/service/jobs.Subscription: - "custom_subject": - "description": |- - Optional: Allows users to specify a custom subject line on the email sent - to subscribers. - "paused": - "description": |- - When true, the subscription will not send emails. - "subscribers": - "description": |- - The list of subscribers to send the snapshot of the dashboard to. -github.com/databricks/databricks-sdk-go/service/jobs.SubscriptionSubscriber: - "destination_id": - "description": |- - A snapshot of the dashboard will be sent to the destination when the `destination_id` field is present. - "user_name": - "description": |- - A snapshot of the dashboard will be sent to the user's email when the `user_name` field is present. -github.com/databricks/databricks-sdk-go/service/jobs.TableUpdateTriggerConfiguration: - "condition": - "description": |- - The table(s) condition based on which to trigger a job run. - "min_time_between_triggers_seconds": - "description": |- - If set, the trigger starts a run only after the specified amount of time has passed since - the last time the trigger fired. The minimum allowed value is 60 seconds. - "table_names": - "description": |- - A list of tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. - "wait_after_last_change_seconds": - "description": |- - If set, the trigger starts a run only after no table updates have occurred for the specified time - and can be used to wait for a series of table updates before triggering a run. The - minimum allowed value is 60 seconds. -github.com/databricks/databricks-sdk-go/service/jobs.Task: - "clean_rooms_notebook_task": - "description": |- - The task runs a [clean rooms](https://docs.databricks.com/clean-rooms/index.html) notebook - when the `clean_rooms_notebook_task` field is present. - "compute": - "description": |- - Task level compute configuration. - "condition_task": - "description": |- - The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. - The condition task does not require a cluster to execute and does not support retries or notifications. - "dashboard_task": - "description": |- - The task refreshes a dashboard and sends a snapshot to subscribers. - "dbt_cloud_task": - "description": |- - Task type for dbt cloud, deprecated in favor of the new name dbt_platform_task - "deprecation_message": |- - This field is deprecated - "x-databricks-preview": |- - PRIVATE - "dbt_platform_task": - "x-databricks-preview": |- - PRIVATE - "dbt_task": - "description": |- - The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. - "depends_on": - "description": |- - An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. - The key is `task_key`, and the value is the name assigned to the dependent task. - "description": - "description": |- - An optional description for this task. - "disable_auto_optimization": - "description": |- - An option to disable auto optimization in serverless - "disabled": - "description": |- - An optional flag to disable the task. If set to true, the task will not run even if it is part of a job. - "x-databricks-preview": |- - PRIVATE - "email_notifications": - "description": |- - An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. - "environment_key": - "description": |- - The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute. - "existing_cluster_id": - "description": |- - If existing_cluster_id, the ID of an existing cluster that is used for all runs. - When running jobs or tasks on an existing cluster, you may need to manually restart - the cluster if it stops responding. We suggest running jobs and tasks on new clusters for - greater reliability - "for_each_task": - "description": |- - The task executes a nested task for every input provided when the `for_each_task` field is present. - "gen_ai_compute_task": - "x-databricks-preview": |- - PRIVATE - "health": - "description": |- - An optional set of health rules that can be defined for this job. - "job_cluster_key": - "description": |- - If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`. - "libraries": - "description": |- - An optional list of libraries to be installed on the cluster. - The default value is an empty list. - "max_retries": - "description": |- - An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry. - "min_retry_interval_millis": - "description": |- - An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried. - "new_cluster": - "description": |- - If new_cluster, a description of a new cluster that is created for each run. - "notebook_task": - "description": |- - The task runs a notebook when the `notebook_task` field is present. - "notification_settings": - "description": |- - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. - "pipeline_task": - "description": |- - The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. - "power_bi_task": - "description": |- - The task triggers a Power BI semantic model update when the `power_bi_task` field is present. - "python_wheel_task": - "description": |- - The task runs a Python wheel when the `python_wheel_task` field is present. - "retry_on_timeout": - "description": |- - An optional policy to specify whether to retry a job when it times out. The default behavior - is to not retry on timeout. - "run_if": - "description": |- - An optional value specifying the condition determining whether the task is run once its dependencies have been completed. - - * `ALL_SUCCESS`: All dependencies have executed and succeeded - * `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded - * `NONE_FAILED`: None of the dependencies have failed and at least one was executed - * `ALL_DONE`: All dependencies have been completed - * `AT_LEAST_ONE_FAILED`: At least one dependency failed - * `ALL_FAILED`: ALl dependencies have failed - "run_job_task": - "description": |- - The task triggers another job when the `run_job_task` field is present. - "spark_jar_task": - "description": |- - The task runs a JAR when the `spark_jar_task` field is present. - "spark_python_task": - "description": |- - The task runs a Python file when the `spark_python_task` field is present. - "spark_submit_task": - "description": |- - (Legacy) The task runs the spark-submit script when the spark_submit_task field is present. Databricks recommends using the spark_jar_task instead; see [Spark Submit task for jobs](/jobs/spark-submit). - "deprecation_message": |- - This field is deprecated - "sql_task": - "description": |- - The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. - "task_key": - "description": |- - A unique name for the task. This field is used to refer to this task from other tasks. - This field is required and must be unique within its parent job. - On Update or Reset, this field is used to reference the tasks to be updated or reset. - "timeout_seconds": - "description": |- - An optional timeout applied to each run of this job task. A value of `0` means no timeout. - "webhook_notifications": - "description": |- - A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. -github.com/databricks/databricks-sdk-go/service/jobs.TaskDependency: - "outcome": - "description": |- - Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run. - "task_key": - "description": |- - The name of the task this task depends on. -github.com/databricks/databricks-sdk-go/service/jobs.TaskEmailNotifications: - "no_alert_for_skipped_runs": - "description": |- - If true, do not send email to recipients specified in `on_failure` if the run is skipped. - This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. - "deprecation_message": |- - This field is deprecated - "on_duration_warning_threshold_exceeded": - "description": |- - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. - "on_failure": - "description": |- - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. - "on_start": - "description": |- - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - "on_streaming_backlog_exceeded": - "description": |- - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. - Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. - Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - "on_success": - "description": |- - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. -github.com/databricks/databricks-sdk-go/service/jobs.TaskNotificationSettings: - "alert_on_last_attempt": - "description": |- - If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run. - "no_alert_for_canceled_runs": - "description": |- - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. - "no_alert_for_skipped_runs": - "description": |- - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. -github.com/databricks/databricks-sdk-go/service/jobs.TaskRetryMode: - "_": - "description": |- - task retry mode of the continuous job - * NEVER: The failed task will not be retried. - * ON_FAILURE: Retry a failed task if at least one other task in the job is still running its first attempt. - When this condition is no longer met or the retry limit is reached, the job run is cancelled and a new run is started. - "enum": - - |- - NEVER - - |- - ON_FAILURE -github.com/databricks/databricks-sdk-go/service/jobs.TriggerSettings: - "file_arrival": - "description": |- - File arrival trigger settings. - "model": - "x-databricks-preview": |- - PRIVATE - "pause_status": - "description": |- - Whether this trigger is paused or not. - "periodic": - "description": |- - Periodic trigger settings. - "table_update": {} -github.com/databricks/databricks-sdk-go/service/jobs.Webhook: - "id": {} -github.com/databricks/databricks-sdk-go/service/jobs.WebhookNotifications: - "on_duration_warning_threshold_exceeded": - "description": |- - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. - "on_failure": - "description": |- - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. - "on_start": - "description": |- - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. - "on_streaming_backlog_exceeded": - "description": |- - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. - Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. - Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. - "on_success": - "description": |- - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. -github.com/databricks/databricks-sdk-go/service/ml.ExperimentTag: - "_": - "description": |- - A tag for an experiment. - "key": - "description": |- - The tag key. - "value": - "description": |- - The tag value. -github.com/databricks/databricks-sdk-go/service/ml.ModelTag: - "_": - "description": |- - Tag for a registered model - "key": - "description": |- - The tag key. - "value": - "description": |- - The tag value. -github.com/databricks/databricks-sdk-go/service/pipelines.AutoFullRefreshPolicy: - "_": - "description": |- - Policy for auto full refresh. - "enabled": - "description": |- - (Required, Mutable) Whether to enable auto full refresh or not. - "min_interval_hours": - "description": |- - (Optional, Mutable) Specify the minimum interval in hours between the timestamp - at which a table was last full refreshed and the current timestamp for triggering auto full - If unspecified and autoFullRefresh is enabled then by default min_interval_hours is 24 hours. -github.com/databricks/databricks-sdk-go/service/pipelines.ConnectionParameters: - "source_catalog": - "description": |- - Source catalog for initial connection. - This is necessary for schema exploration in some database systems like Oracle, and optional but nice-to-have - in some other database systems like Postgres. - For Oracle databases, this maps to a service name. - "x-databricks-preview": |- - PRIVATE -github.com/databricks/databricks-sdk-go/service/pipelines.CronTrigger: - "quartz_cron_schedule": {} - "timezone_id": {} -github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek: - "_": - "description": |- - Days of week in which the window is allowed to happen. - If not specified all days of the week will be used. - "enum": - - |- - MONDAY - - |- - TUESDAY - - |- - WEDNESDAY - - |- - THURSDAY - - |- - FRIDAY - - |- - SATURDAY - - |- - SUNDAY -github.com/databricks/databricks-sdk-go/service/pipelines.DeploymentKind: - "_": - "description": |- - The deployment method that manages the pipeline: - - BUNDLE: The pipeline is managed by a Databricks Asset Bundle. - "enum": - - |- - BUNDLE -github.com/databricks/databricks-sdk-go/service/pipelines.EventLogSpec: - "_": - "description": |- - Configurable event log parameters. - "catalog": - "description": |- - The UC catalog the event log is published under. - "name": - "description": |- - The name the event log is published to in UC. - "schema": - "description": |- - The UC schema the event log is published under. -github.com/databricks/databricks-sdk-go/service/pipelines.FileLibrary: - "path": - "description": |- - The absolute path of the source code. -github.com/databricks/databricks-sdk-go/service/pipelines.Filters: - "exclude": - "description": |- - Paths to exclude. - "include": - "description": |- - Paths to include. -github.com/databricks/databricks-sdk-go/service/pipelines.IngestionConfig: - "report": - "description": |- - Select a specific source report. - "schema": - "description": |- - Select all tables from a specific source schema. - "table": - "description": |- - Select a specific source table. -github.com/databricks/databricks-sdk-go/service/pipelines.IngestionGatewayPipelineDefinition: - "connection_id": - "description": |- - [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. - "deprecation_message": |- - This field is deprecated - "connection_name": - "description": |- - Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. - "connection_parameters": - "description": |- - Optional, Internal. Parameters required to establish an initial connection with the source. - "x-databricks-preview": |- - PRIVATE - "gateway_storage_catalog": - "description": |- - Required, Immutable. The name of the catalog for the gateway pipeline's storage location. - "gateway_storage_name": - "description": |- - Optional. The Unity Catalog-compatible name for the gateway storage location. - This is the destination to use for the data that is extracted by the gateway. - Spark Declarative Pipelines system will automatically create the storage location under the catalog and schema. - "gateway_storage_schema": - "description": |- - Required, Immutable. The name of the schema for the gateway pipelines's storage location. -github.com/databricks/databricks-sdk-go/service/pipelines.IngestionPipelineDefinition: - "connection_name": - "description": |- - The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with - both connectors for applications like Salesforce, Workday, and so on, and also database connectors like Oracle, - (connector_type = QUERY_BASED OR connector_type = CDC). - If connection name corresponds to database connectors like Oracle, and connector_type is not provided then - connector_type defaults to QUERY_BASED. If connector_type is passed as CDC we use Combined Cdc Managed Ingestion - pipeline. - Under certain conditions, this can be replaced with ingestion_gateway_id to change the connector to Cdc Managed - Ingestion Pipeline with Gateway pipeline. - "full_refresh_window": - "description": |- - (Optional) A window that specifies a set of time ranges for snapshot queries in CDC. - "ingest_from_uc_foreign_catalog": - "description": |- - Immutable. If set to true, the pipeline will ingest tables from the - UC foreign catalogs directly without the need to specify a UC connection or ingestion gateway. - The `source_catalog` fields in objects of IngestionConfig are interpreted as - the UC foreign catalogs to ingest from. - "x-databricks-preview": |- - PRIVATE - "ingestion_gateway_id": - "description": |- - Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. - This is used with CDC connectors to databases like SQL Server using a gateway pipeline (connector_type = CDC). - Under certain conditions, this can be replaced with connection_name to change the connector to Combined Cdc - Managed Ingestion Pipeline. - "netsuite_jar_path": - "description": |- - Netsuite only configuration. When the field is set for a netsuite connector, - the jar stored in the field will be validated and added to the classpath of - pipeline's cluster. - "x-databricks-preview": |- - PRIVATE - "objects": - "description": |- - Required. Settings specifying tables to replicate and the destination for the replicated tables. - "source_configurations": - "description": |- - Top-level source configurations - "source_type": - "description": |- - The type of the foreign source. - The source type will be inferred from the source connection or ingestion gateway. - This field is output only and will be ignored if provided. - "x-databricks-field-behaviors_output_only": |- - true - "table_configuration": - "description": |- - Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. -? github.com/databricks/databricks-sdk-go/service/pipelines.IngestionPipelineDefinitionTableSpecificConfigQueryBasedConnectorConfig -: "_": - "description": |- - Configurations that are only applicable for query-based ingestion connectors. - "cursor_columns": - "description": |- - The names of the monotonically increasing columns in the source table that are used to enable - the table to be read and ingested incrementally through structured streaming. - The columns are allowed to have repeated values but have to be non-decreasing. - If the source data is merged into the destination (e.g., using SCD Type 1 or Type 2), these - columns will implicitly define the `sequence_by` behavior. You can still explicitly set - `sequence_by` to override this default. - "x-databricks-preview": |- - PRIVATE - "deletion_condition": - "description": |- - Specifies a SQL WHERE condition that specifies that the source row has been deleted. - This is sometimes referred to as "soft-deletes". - For example: "Operation = 'DELETE'" or "is_deleted = true". - This field is orthogonal to `hard_deletion_sync_interval_in_seconds`, - one for soft-deletes and the other for hard-deletes. - See also the hard_deletion_sync_min_interval_in_seconds field for - handling of "hard deletes" where the source rows are physically removed from the table. - "x-databricks-preview": |- - PRIVATE - "hard_deletion_sync_min_interval_in_seconds": - "description": |- - Specifies the minimum interval (in seconds) between snapshots on primary keys - for detecting and synchronizing hard deletions—i.e., rows that have been - physically removed from the source table. - This interval acts as a lower bound. If ingestion runs less frequently than - this value, hard deletion synchronization will align with the actual ingestion - frequency instead of happening more often. - If not set, hard deletion synchronization via snapshots is disabled. - This field is mutable and can be updated without triggering a full snapshot. - "x-databricks-preview": |- - PRIVATE -github.com/databricks/databricks-sdk-go/service/pipelines.IngestionPipelineDefinitionWorkdayReportParameters: - "incremental": - "description": |- - (Optional) Marks the report as incremental. - This field is deprecated and should not be used. Use `parameters` instead. The incremental behavior is now - controlled by the `parameters` field. - "deprecation_message": |- - This field is deprecated - "parameters": - "description": |- - Parameters for the Workday report. Each key represents the parameter name (e.g., "start_date", "end_date"), - and the corresponding value is a SQL-like expression used to compute the parameter value at runtime. - Example: - { - "start_date": "{ coalesce(current_offset(), date(\"2025-02-01\")) }", - "end_date": "{ current_date() - INTERVAL 1 DAY }" - } - "report_parameters": - "description": |- - (Optional) Additional custom parameters for Workday Report - This field is deprecated and should not be used. Use `parameters` instead. - "deprecation_message": |- - This field is deprecated -github.com/databricks/databricks-sdk-go/service/pipelines.IngestionPipelineDefinitionWorkdayReportParametersQueryKeyValue: - "key": - "description": |- - Key for the report parameter, can be a column name or other metadata - "value": - "description": |- - Value for the report parameter. - Possible values it can take are these sql functions: - 1. coalesce(current_offset(), date("YYYY-MM-DD")) -> if current_offset() is null, then the passed date, else current_offset() - 2. current_date() - 3. date_sub(current_date(), x) -> subtract x (some non-negative integer) days from current date -github.com/databricks/databricks-sdk-go/service/pipelines.IngestionSourceType: - "_": - "enum": - - |- - MYSQL - - |- - POSTGRESQL - - |- - SQLSERVER - - |- - SALESFORCE - - |- - BIGQUERY - - |- - NETSUITE - - |- - WORKDAY_RAAS - - |- - GA4_RAW_DATA - - |- - SERVICENOW - - |- - MANAGED_POSTGRESQL - - |- - ORACLE - - |- - TERADATA - - |- - SHAREPOINT - - |- - DYNAMICS365 - - |- - FOREIGN_CATALOG -github.com/databricks/databricks-sdk-go/service/pipelines.ManualTrigger: {} -github.com/databricks/databricks-sdk-go/service/pipelines.NotebookLibrary: - "path": - "description": |- - The absolute path of the source code. -github.com/databricks/databricks-sdk-go/service/pipelines.Notifications: - "alerts": - "description": |- - A list of alerts that trigger the sending of notifications to the configured - destinations. The supported alerts are: - - * `on-update-success`: A pipeline update completes successfully. - * `on-update-failure`: Each time a pipeline update fails. - * `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error. - * `on-flow-failure`: A single data flow fails. - "email_recipients": - "description": |- - A list of email addresses notified when a configured alert is triggered. -github.com/databricks/databricks-sdk-go/service/pipelines.OperationTimeWindow: - "_": - "description": |- - Proto representing a window - "days_of_week": - "description": |- - Days of week in which the window is allowed to happen - If not specified all days of the week will be used. - "start_hour": - "description": |- - An integer between 0 and 23 denoting the start hour for the window in the 24-hour day. - "time_zone_id": - "description": |- - Time zone id of window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. - If not specified, UTC will be used. -github.com/databricks/databricks-sdk-go/service/pipelines.PathPattern: - "include": - "description": |- - The source code to include for pipelines -github.com/databricks/databricks-sdk-go/service/pipelines.PipelineCluster: - "apply_policy_default_values": - "description": |- - Note: This field won't be persisted. Only API users will check this field. - "autoscale": - "description": |- - Parameters needed in order to automatically scale clusters up and down based on load. - Note: autoscaling works best with DB runtime versions 3.0 or later. - "aws_attributes": - "description": |- - Attributes related to clusters running on Amazon Web Services. - If not specified at cluster creation, a set of default values will be used. - "azure_attributes": - "description": |- - Attributes related to clusters running on Microsoft Azure. - If not specified at cluster creation, a set of default values will be used. - "cluster_log_conf": - "description": |- - The configuration for delivering spark logs to a long-term storage destination. - Only dbfs destinations are supported. Only one destination can be specified - for one cluster. If the conf is given, the logs will be delivered to the destination every - `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while - the destination of executor logs is `$destination/$clusterId/executor`. - "custom_tags": - "description": |- - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS - instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - - - Currently, Databricks allows at most 45 custom tags - - - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - "driver_instance_pool_id": - "description": |- - The optional ID of the instance pool for the driver of the cluster belongs. - The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not - assigned. - "driver_node_type_id": - "description": |- - The node type of the Spark driver. - Note that this field is optional; if unset, the driver node type will be set as the same value - as `node_type_id` defined above. - "enable_local_disk_encryption": - "description": |- - Whether to enable local disk encryption for the cluster. - "gcp_attributes": - "description": |- - Attributes related to clusters running on Google Cloud Platform. - If not specified at cluster creation, a set of default values will be used. - "init_scripts": - "description": |- - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - "instance_pool_id": - "description": |- - The optional ID of the instance pool to which the cluster belongs. - "label": - "description": |- - A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`. - "node_type_id": - "description": |- - This field encodes, through a single value, the resources available to each of - the Spark nodes in this cluster. For example, the Spark nodes can be provisioned - and optimized for memory or compute intensive workloads. A list of available node - types can be retrieved by using the :method:clusters/listNodeTypes API call. - "num_workers": - "description": |- - Number of worker nodes that this cluster should have. A cluster has one Spark Driver - and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. - - Note: When reading the properties of a cluster, this field reflects the desired number - of workers rather than the actual current number of workers. For instance, if a cluster - is resized from 5 to 10 workers, this field will immediately be updated to reflect - the target size of 10 workers, whereas the workers listed in `spark_info` will gradually - increase from 5 to 10 as the new nodes are provisioned. - "policy_id": - "description": |- - The ID of the cluster policy used to create the cluster if applicable. - "spark_conf": - "description": |- - An object containing a set of optional, user-specified Spark configuration key-value pairs. - See :method:clusters/create for more details. - "spark_env_vars": - "description": |- - An object containing a set of optional, user-specified environment variable key-value pairs. - Please note that key-value pair of the form (X,Y) will be exported as is (i.e., - `export X='Y'`) while launching the driver and workers. - - In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending - them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all - default databricks managed environmental variables are included as well. - - Example Spark environment variables: - `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or - `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - "ssh_public_keys": - "description": |- - SSH public key contents that will be added to each Spark node in this cluster. The - corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. - Up to 10 keys can be specified. -github.com/databricks/databricks-sdk-go/service/pipelines.PipelineClusterAutoscale: - "max_workers": - "description": |- - The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`. - "min_workers": - "description": |- - The minimum number of workers the cluster can scale down to when underutilized. - It is also the initial number of workers the cluster will have after creation. - "mode": - "description": |- - Databricks Enhanced Autoscaling optimizes cluster utilization by automatically - allocating cluster resources based on workload volume, with minimal impact to - the data processing latency of your pipelines. Enhanced Autoscaling is available - for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` - clusters. -github.com/databricks/databricks-sdk-go/service/pipelines.PipelineClusterAutoscaleMode: - "_": - "description": |- - Databricks Enhanced Autoscaling optimizes cluster utilization by automatically - allocating cluster resources based on workload volume, with minimal impact to - the data processing latency of your pipelines. Enhanced Autoscaling is available - for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` - clusters. - "enum": - - |- - ENHANCED - - |- - LEGACY -github.com/databricks/databricks-sdk-go/service/pipelines.PipelineDeployment: - "kind": - "description": |- - The deployment method that manages the pipeline. - "metadata_file_path": - "description": |- - The path to the file containing metadata about the deployment. -github.com/databricks/databricks-sdk-go/service/pipelines.PipelineLibrary: - "file": - "description": |- - The path to a file that defines a pipeline and is stored in the Databricks Repos. - "glob": - "description": |- - The unified field to include source codes. - Each entry can be a notebook path, a file path, or a folder path that ends `/**`. - This field cannot be used together with `notebook` or `file`. - "jar": - "description": |- - URI of the jar to be installed. Currently only DBFS is supported. - "x-databricks-preview": |- - PRIVATE - "maven": - "description": |- - Specification of a maven library to be installed. - "x-databricks-preview": |- - PRIVATE - "notebook": - "description": |- - The path to a notebook that defines a pipeline and is stored in the Databricks workspace. - "whl": - "description": |- - URI of the whl to be installed. - "deprecation_message": |- - This field is deprecated -github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger: - "cron": {} - "manual": {} -github.com/databricks/databricks-sdk-go/service/pipelines.PipelinesEnvironment: - "_": - "description": |- - The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. - In this minimal environment spec, only pip dependencies are supported. - "dependencies": - "description": |- - List of pip dependencies, as supported by the version of pip in this environment. - Each dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/ - Allowed dependency could be , , (WSFS or Volumes in Databricks), -github.com/databricks/databricks-sdk-go/service/pipelines.PostgresCatalogConfig: - "_": - "description": |- - PG-specific catalog-level configuration parameters - "slot_config": - "description": |- - Optional. The Postgres slot configuration to use for logical replication -github.com/databricks/databricks-sdk-go/service/pipelines.PostgresSlotConfig: - "_": - "description": |- - PostgresSlotConfig contains the configuration for a Postgres logical replication slot - "publication_name": - "description": |- - The name of the publication to use for the Postgres source - "slot_name": - "description": |- - The name of the logical replication slot to use for the Postgres source -github.com/databricks/databricks-sdk-go/service/pipelines.ReportSpec: - "destination_catalog": - "description": |- - Required. Destination catalog to store table. - "destination_schema": - "description": |- - Required. Destination schema to store table. - "destination_table": - "description": |- - Required. Destination table name. The pipeline fails if a table with that name already exists. - "source_url": - "description": |- - Required. Report URL in the source system. - "table_configuration": - "description": |- - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. -github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindow: - "days_of_week": - "description": |- - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). - If not specified all days of the week will be used. - "start_hour": - "description": |- - An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. - Continuous pipeline restart is triggered only within a five-hour window starting at this hour. - "time_zone_id": - "description": |- - Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. - If not specified, UTC will be used. -github.com/databricks/databricks-sdk-go/service/pipelines.RunAs: - "_": - "description": |- - Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline. - - Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. - "service_principal_name": - "description": |- - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - "user_name": - "description": |- - The email of an active workspace user. Users can only set this field to their own email. -github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec: - "destination_catalog": - "description": |- - Required. Destination catalog to store tables. - "destination_schema": - "description": |- - Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists. - "source_catalog": - "description": |- - The source catalog name. Might be optional depending on the type of source. - "source_schema": - "description": |- - Required. Schema name in the source database. - "table_configuration": - "description": |- - Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. -github.com/databricks/databricks-sdk-go/service/pipelines.SourceCatalogConfig: - "_": - "description": |- - SourceCatalogConfig contains catalog-level custom configuration parameters for each source - "postgres": - "description": |- - Postgres-specific catalog-level configuration parameters - "source_catalog": - "description": |- - Source catalog name -github.com/databricks/databricks-sdk-go/service/pipelines.SourceConfig: - "catalog": - "description": |- - Catalog-level source configuration parameters -github.com/databricks/databricks-sdk-go/service/pipelines.TableSpec: - "destination_catalog": - "description": |- - Required. Destination catalog to store table. - "destination_schema": - "description": |- - Required. Destination schema to store table. - "destination_table": - "description": |- - Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used. - "source_catalog": - "description": |- - Source catalog name. Might be optional depending on the type of source. - "source_schema": - "description": |- - Schema name in the source database. Might be optional depending on the type of source. - "source_table": - "description": |- - Required. Table name in the source database. - "table_configuration": - "description": |- - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. -github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig: - "auto_full_refresh_policy": - "description": |- - (Optional, Mutable) Policy for auto full refresh, if enabled pipeline will automatically try - to fix issues by doing a full refresh on the table in the retry run. auto_full_refresh_policy - in table configuration will override the above level auto_full_refresh_policy. - For example, - { - "auto_full_refresh_policy": { - "enabled": true, - "min_interval_hours": 23, - } - } - If unspecified, auto full refresh is disabled. - "exclude_columns": - "description": |- - A list of column names to be excluded for the ingestion. - When not specified, include_columns fully controls what columns to be ingested. - When specified, all other columns including future ones will be automatically included for ingestion. - This field in mutually exclusive with `include_columns`. - "include_columns": - "description": |- - A list of column names to be included for the ingestion. - When not specified, all columns except ones in exclude_columns will be included. Future - columns will be automatically included. - When specified, all other future columns will be automatically excluded from ingestion. - This field in mutually exclusive with `exclude_columns`. - "primary_keys": - "description": |- - The primary key of the table used to apply changes. - "query_based_connector_config": - "description": |- - Configurations that are only applicable for query-based ingestion connectors. - "x-databricks-preview": |- - PRIVATE - "row_filter": - "description": |- - (Optional, Immutable) The row filter condition to be applied to the table. - It must not contain the WHERE keyword, only the actual filter condition. - It must be in DBSQL format. - "x-databricks-preview": |- - PRIVATE - "salesforce_include_formula_fields": - "description": |- - If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector - "x-databricks-preview": |- - PRIVATE - "scd_type": - "description": |- - The SCD type to use to ingest the table. - "x-databricks-preview": |- - PRIVATE - "sequence_by": - "description": |- - The column names specifying the logical order of events in the source data. Spark Declarative Pipelines uses this sequencing to handle change events that arrive out of order. - "workday_report_parameters": - "description": |- - (Optional) Additional custom parameters for Workday Report - "x-databricks-preview": |- - PRIVATE -github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfigScdType: - "_": - "description": |- - The SCD type to use to ingest the table. - "enum": - - |- - SCD_TYPE_1 - - |- - SCD_TYPE_2 - - |- - APPEND_ONLY -github.com/databricks/databricks-sdk-go/service/postgres.EndpointGroupSpec: - "enable_readable_secondaries": - "description": |- - Whether to allow read-only connections to read-write endpoints. Only relevant for read-write endpoints where - size.max > 1. - "max": - "description": |- - The maximum number of computes in the endpoint group. Currently, this must be equal to min. Set to 1 for single - compute endpoints, to disable HA. To manually suspend all computes in an endpoint group, set disabled to - true on the EndpointSpec. - "min": - "description": |- - The minimum number of computes in the endpoint group. Currently, this must be equal to max. This must be greater - than or equal to 1. -github.com/databricks/databricks-sdk-go/service/postgres.EndpointSettings: - "_": - "description": |- - A collection of settings for a compute endpoint. - "pg_settings": - "description": |- - A raw representation of Postgres settings. -github.com/databricks/databricks-sdk-go/service/postgres.EndpointType: - "_": - "description": |- - The compute endpoint type. Either `read_write` or `read_only`. - "enum": - - |- - ENDPOINT_TYPE_READ_WRITE - - |- - ENDPOINT_TYPE_READ_ONLY -github.com/databricks/databricks-sdk-go/service/postgres.ProjectCustomTag: - "key": - "description": |- - The key of the custom tag. - "value": - "description": |- - The value of the custom tag. -github.com/databricks/databricks-sdk-go/service/postgres.ProjectDefaultEndpointSettings: - "_": - "description": |- - A collection of settings for a compute endpoint. - "autoscaling_limit_max_cu": - "description": |- - The maximum number of Compute Units. Minimum value is 0.5. - "autoscaling_limit_min_cu": - "description": |- - The minimum number of Compute Units. Minimum value is 0.5. - "no_suspension": - "description": |- - When set to true, explicitly disables automatic suspension (never suspend). - Should be set to true when provided. - "pg_settings": - "description": |- - A raw representation of Postgres settings. - "suspend_timeout_duration": - "description": |- - Duration of inactivity after which the compute endpoint is automatically suspended. - If specified should be between 60s and 604800s (1 minute to 1 week). -github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig: - "ai21labs_api_key": - "description": |- - The Databricks secret key reference for an AI21 Labs API key. If you - prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. - You must provide an API key using one of the following fields: - `ai21labs_api_key` or `ai21labs_api_key_plaintext`. - "ai21labs_api_key_plaintext": - "description": |- - An AI21 Labs API key provided as a plaintext string. If you prefer to - reference your key using Databricks Secrets, see `ai21labs_api_key`. You - must provide an API key using one of the following fields: - `ai21labs_api_key` or `ai21labs_api_key_plaintext`. -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayConfig: - "fallback_config": - "description": |- - Configuration for traffic fallback which auto fallbacks to other served entities if the request to a served - entity fails with certain error codes, to increase availability. - "guardrails": - "description": |- - Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. - "inference_table_config": - "description": |- - Configuration for payload logging using inference tables. - Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. - "rate_limits": - "description": |- - Configuration for rate limits which can be set to limit endpoint traffic. - "usage_tracking_config": - "description": |- - Configuration to enable usage tracking using system tables. - These tables allow you to monitor operational usage on endpoints and their associated costs. -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailParameters: - "invalid_keywords": - "description": |- - List of invalid keywords. - AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. - "deprecation_message": |- - This field is deprecated - "pii": - "description": |- - Configuration for guardrail PII filter. - "safety": - "description": |- - Indicates whether the safety filter is enabled. - "valid_topics": - "description": |- - The list of allowed topics. - Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. - "deprecation_message": |- - This field is deprecated -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailPiiBehavior: - "behavior": - "description": |- - Configuration for input guardrail filters. -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailPiiBehaviorBehavior: - "_": - "enum": - - |- - NONE - - |- - BLOCK - - |- - MASK -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrails: - "input": - "description": |- - Configuration for input guardrail filters. - "output": - "description": |- - Configuration for output guardrail filters. -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayInferenceTableConfig: - "catalog_name": - "description": |- - The name of the catalog in Unity Catalog. Required when enabling inference tables. - NOTE: On update, you have to disable inference table first in order to change the catalog name. - "enabled": - "description": |- - Indicates whether the inference table is enabled. - "schema_name": - "description": |- - The name of the schema in Unity Catalog. Required when enabling inference tables. - NOTE: On update, you have to disable inference table first in order to change the schema name. - "table_name_prefix": - "description": |- - The prefix of the table in Unity Catalog. - NOTE: On update, you have to disable inference table first in order to change the prefix name. -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimit: - "calls": - "description": |- - Used to specify how many calls are allowed for a key within the renewal_period. - "key": - "description": |- - Key field for a rate limit. Currently, 'user', 'user_group, 'service_principal', and 'endpoint' are supported, - with 'endpoint' being the default if not specified. - "principal": - "description": |- - Principal field for a user, user group, or service principal to apply rate limiting to. Accepts a user email, group name, or service principal application ID. - "renewal_period": - "description": |- - Renewal period field for a rate limit. Currently, only 'minute' is supported. - "tokens": - "description": |- - Used to specify how many tokens are allowed for a key within the renewal_period. -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitKey: - "_": - "enum": - - |- - user - - |- - endpoint - - |- - user_group - - |- - service_principal -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitRenewalPeriod: - "_": - "enum": - - |- - minute -github.com/databricks/databricks-sdk-go/service/serving.AiGatewayUsageTrackingConfig: - "enabled": - "description": |- - Whether to enable usage tracking. -github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfig: - "aws_access_key_id": - "description": |- - The Databricks secret key reference for an AWS access key ID with - permissions to interact with Bedrock services. If you prefer to paste - your API key directly, see `aws_access_key_id_plaintext`. You must provide an API - key using one of the following fields: `aws_access_key_id` or - `aws_access_key_id_plaintext`. - "aws_access_key_id_plaintext": - "description": |- - An AWS access key ID with permissions to interact with Bedrock services - provided as a plaintext string. If you prefer to reference your key using - Databricks Secrets, see `aws_access_key_id`. You must provide an API key - using one of the following fields: `aws_access_key_id` or - `aws_access_key_id_plaintext`. - "aws_region": - "description": |- - The AWS region to use. Bedrock has to be enabled there. - "aws_secret_access_key": - "description": |- - The Databricks secret key reference for an AWS secret access key paired - with the access key ID, with permissions to interact with Bedrock - services. If you prefer to paste your API key directly, see - `aws_secret_access_key_plaintext`. You must provide an API key using one - of the following fields: `aws_secret_access_key` or - `aws_secret_access_key_plaintext`. - "aws_secret_access_key_plaintext": - "description": |- - An AWS secret access key paired with the access key ID, with permissions - to interact with Bedrock services provided as a plaintext string. If you - prefer to reference your key using Databricks Secrets, see - `aws_secret_access_key`. You must provide an API key using one of the - following fields: `aws_secret_access_key` or - `aws_secret_access_key_plaintext`. - "bedrock_provider": - "description": |- - The underlying provider in Amazon Bedrock. Supported values (case - insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. - "instance_profile_arn": - "description": |- - ARN of the instance profile that the external model will use to access AWS resources. - You must authenticate using an instance profile or access keys. - If you prefer to authenticate using access keys, see `aws_access_key_id`, - `aws_access_key_id_plaintext`, `aws_secret_access_key` and `aws_secret_access_key_plaintext`. -github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfigBedrockProvider: - "_": - "enum": - - |- - anthropic - - |- - cohere - - |- - ai21labs - - |- - amazon -github.com/databricks/databricks-sdk-go/service/serving.AnthropicConfig: - "anthropic_api_key": - "description": |- - The Databricks secret key reference for an Anthropic API key. If you - prefer to paste your API key directly, see `anthropic_api_key_plaintext`. - You must provide an API key using one of the following fields: - `anthropic_api_key` or `anthropic_api_key_plaintext`. - "anthropic_api_key_plaintext": - "description": |- - The Anthropic API key provided as a plaintext string. If you prefer to - reference your key using Databricks Secrets, see `anthropic_api_key`. You - must provide an API key using one of the following fields: - `anthropic_api_key` or `anthropic_api_key_plaintext`. -github.com/databricks/databricks-sdk-go/service/serving.ApiKeyAuth: - "key": - "description": |- - The name of the API key parameter used for authentication. - "value": - "description": |- - The Databricks secret key reference for an API Key. - If you prefer to paste your token directly, see `value_plaintext`. - "value_plaintext": - "description": |- - The API Key provided as a plaintext string. If you prefer to reference your - token using Databricks Secrets, see `value`. -github.com/databricks/databricks-sdk-go/service/serving.AutoCaptureConfigInput: - "catalog_name": - "description": |- - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. - "enabled": - "description": |- - Indicates whether the inference table is enabled. - "schema_name": - "description": |- - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. - "table_name_prefix": - "description": |- - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. -github.com/databricks/databricks-sdk-go/service/serving.BearerTokenAuth: - "token": - "description": |- - The Databricks secret key reference for a token. - If you prefer to paste your token directly, see `token_plaintext`. - "token_plaintext": - "description": |- - The token provided as a plaintext string. If you prefer to reference your - token using Databricks Secrets, see `token`. -github.com/databricks/databricks-sdk-go/service/serving.CohereConfig: - "cohere_api_base": - "description": |- - This is an optional field to provide a customized base URL for the Cohere - API. If left unspecified, the standard Cohere base URL is used. - "cohere_api_key": - "description": |- - The Databricks secret key reference for a Cohere API key. If you prefer - to paste your API key directly, see `cohere_api_key_plaintext`. You must - provide an API key using one of the following fields: `cohere_api_key` or - `cohere_api_key_plaintext`. - "cohere_api_key_plaintext": - "description": |- - The Cohere API key provided as a plaintext string. If you prefer to - reference your key using Databricks Secrets, see `cohere_api_key`. You - must provide an API key using one of the following fields: - `cohere_api_key` or `cohere_api_key_plaintext`. -github.com/databricks/databricks-sdk-go/service/serving.CustomProviderConfig: - "_": - "description": |- - Configs needed to create a custom provider model route. - "api_key_auth": - "description": |- - This is a field to provide API key authentication for the custom provider API. - You can only specify one authentication method. - "bearer_token_auth": - "description": |- - This is a field to provide bearer token authentication for the custom provider API. - You can only specify one authentication method. - "custom_provider_url": - "description": |- - This is a field to provide the URL of the custom provider API. -github.com/databricks/databricks-sdk-go/service/serving.DatabricksModelServingConfig: - "databricks_api_token": - "description": |- - The Databricks secret key reference for a Databricks API token that - corresponds to a user or service principal with Can Query access to the - model serving endpoint pointed to by this external model. If you prefer - to paste your API key directly, see `databricks_api_token_plaintext`. You - must provide an API key using one of the following fields: - `databricks_api_token` or `databricks_api_token_plaintext`. - "databricks_api_token_plaintext": - "description": |- - The Databricks API token that corresponds to a user or service principal - with Can Query access to the model serving endpoint pointed to by this - external model provided as a plaintext string. If you prefer to reference - your key using Databricks Secrets, see `databricks_api_token`. You must - provide an API key using one of the following fields: - `databricks_api_token` or `databricks_api_token_plaintext`. - "databricks_workspace_url": - "description": |- - The URL of the Databricks workspace containing the model serving endpoint - pointed to by this external model. -github.com/databricks/databricks-sdk-go/service/serving.EmailNotifications: - "on_update_failure": - "description": |- - A list of email addresses to be notified when an endpoint fails to update its configuration or state. - "on_update_success": - "description": |- - A list of email addresses to be notified when an endpoint successfully updates its configuration or state. -github.com/databricks/databricks-sdk-go/service/serving.EndpointCoreConfigInput: - "auto_capture_config": - "description": |- - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. - Note: this field is deprecated for creating new provisioned throughput endpoints, - or updating existing provisioned throughput endpoints that never have inference table configured; - in these cases please use AI Gateway to manage inference tables. - "served_entities": - "description": |- - The list of served entities under the serving endpoint config. - "served_models": - "description": |- - (Deprecated, use served_entities instead) The list of served models under the serving endpoint config. - "traffic_config": - "description": |- - The traffic configuration associated with the serving endpoint config. -github.com/databricks/databricks-sdk-go/service/serving.EndpointTag: - "key": - "description": |- - Key field for a serving endpoint tag. - "value": - "description": |- - Optional value field for a serving endpoint tag. -github.com/databricks/databricks-sdk-go/service/serving.ExternalModel: - "ai21labs_config": - "description": |- - AI21Labs Config. Only required if the provider is 'ai21labs'. - "amazon_bedrock_config": - "description": |- - Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. - "anthropic_config": - "description": |- - Anthropic Config. Only required if the provider is 'anthropic'. - "cohere_config": - "description": |- - Cohere Config. Only required if the provider is 'cohere'. - "custom_provider_config": - "description": |- - Custom Provider Config. Only required if the provider is 'custom'. - "databricks_model_serving_config": - "description": |- - Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. - "google_cloud_vertex_ai_config": - "description": |- - Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. - "name": - "description": |- - The name of the external model. - "openai_config": - "description": |- - OpenAI Config. Only required if the provider is 'openai'. - "palm_config": - "description": |- - PaLM Config. Only required if the provider is 'palm'. - "provider": - "description": |- - The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', 'palm', and 'custom'. - "task": - "description": |- - The task type of the external model. -github.com/databricks/databricks-sdk-go/service/serving.ExternalModelProvider: - "_": - "enum": - - |- - ai21labs - - |- - anthropic - - |- - amazon-bedrock - - |- - cohere - - |- - databricks-model-serving - - |- - google-cloud-vertex-ai - - |- - openai - - |- - palm - - |- - custom -github.com/databricks/databricks-sdk-go/service/serving.FallbackConfig: - "enabled": - "description": |- - Whether to enable traffic fallback. When a served entity in the serving endpoint returns specific error - codes (e.g. 500), the request will automatically be round-robin attempted with other served entities in the same - endpoint, following the order of served entity list, until a successful response is returned. - If all attempts fail, return the last response with the error code. -github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig: - "private_key": - "description": |- - The Databricks secret key reference for a private key for the service - account which has access to the Google Cloud Vertex AI Service. See [Best - practices for managing service account keys]. If you prefer to paste your - API key directly, see `private_key_plaintext`. You must provide an API - key using one of the following fields: `private_key` or - `private_key_plaintext` - - [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys - "private_key_plaintext": - "description": |- - The private key for the service account which has access to the Google - Cloud Vertex AI Service provided as a plaintext secret. See [Best - practices for managing service account keys]. If you prefer to reference - your key using Databricks Secrets, see `private_key`. You must provide an - API key using one of the following fields: `private_key` or - `private_key_plaintext`. - - [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys - "project_id": - "description": |- - This is the Google Cloud project id that the service account is - associated with. - "region": - "description": |- - This is the region for the Google Cloud Vertex AI Service. See [supported - regions] for more details. Some models are only available in specific - regions. - - [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations -github.com/databricks/databricks-sdk-go/service/serving.OpenAiConfig: - "_": - "description": |- - Configs needed to create an OpenAI model route. - "microsoft_entra_client_id": - "description": |- - This field is only required for Azure AD OpenAI and is the Microsoft - Entra Client ID. - "microsoft_entra_client_secret": - "description": |- - The Databricks secret key reference for a client secret used for - Microsoft Entra ID authentication. If you prefer to paste your client - secret directly, see `microsoft_entra_client_secret_plaintext`. You must - provide an API key using one of the following fields: - `microsoft_entra_client_secret` or - `microsoft_entra_client_secret_plaintext`. - "microsoft_entra_client_secret_plaintext": - "description": |- - The client secret used for Microsoft Entra ID authentication provided as - a plaintext string. If you prefer to reference your key using Databricks - Secrets, see `microsoft_entra_client_secret`. You must provide an API key - using one of the following fields: `microsoft_entra_client_secret` or - `microsoft_entra_client_secret_plaintext`. - "microsoft_entra_tenant_id": - "description": |- - This field is only required for Azure AD OpenAI and is the Microsoft - Entra Tenant ID. - "openai_api_base": - "description": |- - This is a field to provide a customized base URl for the OpenAI API. For - Azure OpenAI, this field is required, and is the base URL for the Azure - OpenAI API service provided by Azure. For other OpenAI API types, this - field is optional, and if left unspecified, the standard OpenAI base URL - is used. - "openai_api_key": - "description": |- - The Databricks secret key reference for an OpenAI API key using the - OpenAI or Azure service. If you prefer to paste your API key directly, - see `openai_api_key_plaintext`. You must provide an API key using one of - the following fields: `openai_api_key` or `openai_api_key_plaintext`. - "openai_api_key_plaintext": - "description": |- - The OpenAI API key using the OpenAI or Azure service provided as a - plaintext string. If you prefer to reference your key using Databricks - Secrets, see `openai_api_key`. You must provide an API key using one of - the following fields: `openai_api_key` or `openai_api_key_plaintext`. - "openai_api_type": - "description": |- - This is an optional field to specify the type of OpenAI API to use. For - Azure OpenAI, this field is required, and adjust this parameter to - represent the preferred security access validation protocol. For access - token validation, use azure. For authentication using Azure Active - Directory (Azure AD) use, azuread. - "openai_api_version": - "description": |- - This is an optional field to specify the OpenAI API version. For Azure - OpenAI, this field is required, and is the version of the Azure OpenAI - service to utilize, specified by a date. - "openai_deployment_name": - "description": |- - This field is only required for Azure OpenAI and is the name of the - deployment resource for the Azure OpenAI service. - "openai_organization": - "description": |- - This is an optional field to specify the organization in OpenAI or Azure - OpenAI. -github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig: - "palm_api_key": - "description": |- - The Databricks secret key reference for a PaLM API key. If you prefer to - paste your API key directly, see `palm_api_key_plaintext`. You must - provide an API key using one of the following fields: `palm_api_key` or - `palm_api_key_plaintext`. - "palm_api_key_plaintext": - "description": |- - The PaLM API key provided as a plaintext string. If you prefer to - reference your key using Databricks Secrets, see `palm_api_key`. You must - provide an API key using one of the following fields: `palm_api_key` or - `palm_api_key_plaintext`. -github.com/databricks/databricks-sdk-go/service/serving.RateLimit: - "calls": - "description": |- - Used to specify how many calls are allowed for a key within the renewal_period. - "key": - "description": |- - Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. - "renewal_period": - "description": |- - Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported. -github.com/databricks/databricks-sdk-go/service/serving.RateLimitKey: - "_": - "enum": - - |- - user - - |- - endpoint -github.com/databricks/databricks-sdk-go/service/serving.RateLimitRenewalPeriod: - "_": - "enum": - - |- - minute -github.com/databricks/databricks-sdk-go/service/serving.Route: - "served_entity_name": {} - "served_model_name": - "description": |- - The name of the served model this route configures traffic for. - "traffic_percentage": - "description": |- - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive. -github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput: - "burst_scaling_enabled": - "description": |- - Whether burst scaling is enabled. When enabled (default), the endpoint can automatically - scale up beyond provisioned capacity to handle traffic spikes. When disabled, the endpoint - maintains fixed capacity at provisioned_model_units. - "entity_name": - "description": |- - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of **catalog_name.schema_name.model_name**. - "entity_version": {} - "environment_vars": - "description": |- - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` - "external_model": - "description": |- - The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. - "instance_profile_arn": - "description": |- - ARN of the instance profile that the served entity uses to access AWS resources. - "max_provisioned_concurrency": - "description": |- - The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified. - "max_provisioned_throughput": - "description": |- - The maximum tokens per second that the endpoint can scale up to. - "min_provisioned_concurrency": - "description": |- - The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified. - "min_provisioned_throughput": - "description": |- - The minimum tokens per second that the endpoint can scale down to. - "name": - "description": |- - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. - "provisioned_model_units": - "description": |- - The number of model units provisioned. - "scale_to_zero_enabled": - "description": |- - Whether the compute resources for the served entity should scale down to zero. - "workload_size": - "description": |- - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified. - "workload_type": - "description": |- - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). -github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput: - "burst_scaling_enabled": - "description": |- - Whether burst scaling is enabled. When enabled (default), the endpoint can automatically - scale up beyond provisioned capacity to handle traffic spikes. When disabled, the endpoint - maintains fixed capacity at provisioned_model_units. - "environment_vars": - "description": |- - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` - "instance_profile_arn": - "description": |- - ARN of the instance profile that the served entity uses to access AWS resources. - "max_provisioned_concurrency": - "description": |- - The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified. - "max_provisioned_throughput": - "description": |- - The maximum tokens per second that the endpoint can scale up to. - "min_provisioned_concurrency": - "description": |- - The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified. - "min_provisioned_throughput": - "description": |- - The minimum tokens per second that the endpoint can scale down to. - "model_name": {} - "model_version": {} - "name": - "description": |- - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. - "provisioned_model_units": - "description": |- - The number of model units provisioned. - "scale_to_zero_enabled": - "description": |- - Whether the compute resources for the served entity should scale down to zero. - "workload_size": - "description": |- - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified. - "workload_type": - "description": |- - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). -github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadType: - "_": - "description": |- - Please keep this in sync with with workload types in InferenceEndpointEntities.scala - "enum": - - |- - CPU - - |- - GPU_MEDIUM - - |- - GPU_SMALL - - |- - GPU_LARGE - - |- - MULTIGPU_MEDIUM -github.com/databricks/databricks-sdk-go/service/serving.ServingModelWorkloadType: - "_": - "description": |- - Please keep this in sync with with workload types in InferenceEndpointEntities.scala - "enum": - - |- - CPU - - |- - GPU_MEDIUM - - |- - GPU_SMALL - - |- - GPU_LARGE - - |- - MULTIGPU_MEDIUM -github.com/databricks/databricks-sdk-go/service/serving.TrafficConfig: - "routes": - "description": |- - The list of routes that define traffic to each served entity. -github.com/databricks/databricks-sdk-go/service/sql.Aggregation: - "_": - "enum": - - |- - SUM - - |- - COUNT - - |- - COUNT_DISTINCT - - |- - AVG - - |- - MEDIAN - - |- - MIN - - |- - MAX - - |- - STDDEV -github.com/databricks/databricks-sdk-go/service/sql.AlertEvaluationState: - "_": - "description": |- - UNSPECIFIED - default unspecify value for proto enum, do not use it in the code - UNKNOWN - alert not yet evaluated - TRIGGERED - alert is triggered - OK - alert is not triggered - ERROR - alert evaluation failed - "enum": - - |- - UNKNOWN - - |- - TRIGGERED - - |- - OK - - |- - ERROR -github.com/databricks/databricks-sdk-go/service/sql.AlertLifecycleState: - "_": - "enum": - - |- - ACTIVE - - |- - DELETED -github.com/databricks/databricks-sdk-go/service/sql.AlertV2Evaluation: - "comparison_operator": - "description": |- - Operator used for comparison in alert evaluation. - "empty_result_state": - "description": |- - Alert state if result is empty. Please avoid setting this field to be `UNKNOWN` because `UNKNOWN` state is planned to be deprecated. - "last_evaluated_at": - "description": |- - Timestamp of the last evaluation. - "x-databricks-field-behaviors_output_only": |- - true - "notification": - "description": |- - User or Notification Destination to notify when alert is triggered. - "source": - "description": |- - Source column from result to use to evaluate alert - "state": - "description": |- - Latest state of alert evaluation. - "x-databricks-field-behaviors_output_only": |- - true - "threshold": - "description": |- - Threshold to user for alert evaluation, can be a column or a value. -github.com/databricks/databricks-sdk-go/service/sql.AlertV2Notification: - "notify_on_ok": - "description": |- - Whether to notify alert subscribers when alert returns back to normal. - "retrigger_seconds": - "description": |- - Number of seconds an alert waits after being triggered before it is allowed to send another notification. - If set to 0 or omitted, the alert will not send any further notifications after the first trigger - Setting this value to 1 allows the alert to send a notification on every evaluation where the condition is met, effectively making it always retrigger for notification purposes. - "subscriptions": {} -github.com/databricks/databricks-sdk-go/service/sql.AlertV2Operand: - "column": {} - "value": {} -github.com/databricks/databricks-sdk-go/service/sql.AlertV2OperandColumn: - "aggregation": - "description": |- - If not set, the behavior is equivalent to using `First row` in the UI. - "display": {} - "name": {} -github.com/databricks/databricks-sdk-go/service/sql.AlertV2OperandValue: - "bool_value": {} - "double_value": {} - "string_value": {} -github.com/databricks/databricks-sdk-go/service/sql.AlertV2RunAs: - "service_principal_name": - "description": |- - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - "user_name": - "description": |- - The email of an active workspace user. Can only set this field to their own email. -github.com/databricks/databricks-sdk-go/service/sql.AlertV2Subscription: - "destination_id": {} - "user_email": {} -github.com/databricks/databricks-sdk-go/service/sql.Channel: - "_": - "description": |- - Configures the channel name and DBSQL version of the warehouse. CHANNEL_NAME_CUSTOM should be chosen only when `dbsql_version` is specified. - "dbsql_version": {} - "name": {} -github.com/databricks/databricks-sdk-go/service/sql.ChannelName: - "_": - "enum": - - |- - CHANNEL_NAME_PREVIEW - - |- - CHANNEL_NAME_CURRENT - - |- - CHANNEL_NAME_PREVIOUS - - |- - CHANNEL_NAME_CUSTOM -github.com/databricks/databricks-sdk-go/service/sql.ComparisonOperator: - "_": - "enum": - - |- - LESS_THAN - - |- - GREATER_THAN - - |- - EQUAL - - |- - NOT_EQUAL - - |- - GREATER_THAN_OR_EQUAL - - |- - LESS_THAN_OR_EQUAL - - |- - IS_NULL - - |- - IS_NOT_NULL -github.com/databricks/databricks-sdk-go/service/sql.CreateWarehouseRequestWarehouseType: - "_": - "enum": - - |- - TYPE_UNSPECIFIED - - |- - CLASSIC - - |- - PRO -github.com/databricks/databricks-sdk-go/service/sql.CronSchedule: - "pause_status": - "description": |- - Indicate whether this schedule is paused or not. - "quartz_cron_schedule": - "description": |- - A cron expression using quartz syntax that specifies the schedule for this pipeline. - Should use the quartz format described here: http://www.quartz-scheduler.org/documentation/quartz-2.1.7/tutorials/tutorial-lesson-06.html - "timezone_id": - "description": |- - A Java timezone id. The schedule will be resolved using this timezone. - This will be combined with the quartz_cron_schedule to determine the schedule. - See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. -github.com/databricks/databricks-sdk-go/service/sql.EndpointTagPair: - "key": {} - "value": {} -github.com/databricks/databricks-sdk-go/service/sql.EndpointTags: - "custom_tags": {} -github.com/databricks/databricks-sdk-go/service/sql.SchedulePauseStatus: - "_": - "enum": - - |- - UNPAUSED - - |- - PAUSED -github.com/databricks/databricks-sdk-go/service/sql.SpotInstancePolicy: - "_": - "description": |- - EndpointSpotInstancePolicy configures whether the endpoint should use spot - instances. - - The breakdown of how the EndpointSpotInstancePolicy converts to per cloud - configurations is: - - +-------+--------------------------------------+--------------------------------+ - | Cloud | COST_OPTIMIZED | RELIABILITY_OPTIMIZED | - +-------+--------------------------------------+--------------------------------+ - | AWS | On Demand Driver with Spot Executors | On Demand Driver and - Executors | | AZURE | On Demand Driver and Executors | On Demand Driver - and Executors | - +-------+--------------------------------------+--------------------------------+ - - While including "spot" in the enum name may limit the the future - extensibility of this field because it limits this enum to denoting "spot or - not", this is the field that PM recommends after discussion with customers - per SC-48783. - "enum": - - |- - POLICY_UNSPECIFIED - - |- - COST_OPTIMIZED - - |- - RELIABILITY_OPTIMIZED -github.com/databricks/databricks-sdk-go/service/workspace.AzureKeyVaultSecretScopeMetadata: - "_": - "description": |- - The metadata of the Azure KeyVault for a secret scope of type `AZURE_KEYVAULT` - "dns_name": - "description": |- - The DNS of the KeyVault - "resource_id": - "description": |- - The resource id of the azure KeyVault that user wants to associate the scope with. -github.com/databricks/databricks-sdk-go/service/workspace.ScopeBackendType: - "_": - "description": |- - The types of secret scope backends in the Secret Manager. Azure KeyVault backed secret scopes - will be supported in a later release. - "enum": - - |- - DATABRICKS - - |- - AZURE_KEYVAULT diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml deleted file mode 100644 index 4aaab347e0..0000000000 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ /dev/null @@ -1,1144 +0,0 @@ -github.com/databricks/cli/bundle/config/resources.Alert: - "evaluation": - "description": |- - PLACEHOLDER - "file_path": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - PLACEHOLDER - "permissions": - "description": |- - PLACEHOLDER - "schedule": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.AlertPermissionLevel: - "_": - "enum": - - |- - CAN_EDIT - - |- - CAN_MANAGE - - |- - CAN_READ - - |- - CAN_RUN -github.com/databricks/cli/bundle/config/resources.App: - "app_status": - "description": |- - PLACEHOLDER - "budget_policy_id": - "description": |- - PLACEHOLDER - "compute_size": - "description": |- - PLACEHOLDER - "compute_status": - "description": |- - PLACEHOLDER - "config": - "description": |- - PLACEHOLDER - "effective_budget_policy_id": - "description": |- - PLACEHOLDER - "effective_usage_policy_id": - "description": |- - PLACEHOLDER - "git_source": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "oauth2_app_client_id": - "description": |- - PLACEHOLDER - "oauth2_app_integration_id": - "description": |- - PLACEHOLDER - "permissions": - "description": |- - PLACEHOLDER - "service_principal_client_id": - "description": |- - PLACEHOLDER - "service_principal_id": - "description": |- - PLACEHOLDER - "service_principal_name": - "description": |- - PLACEHOLDER - "source_code_path": - "description": |- - PLACEHOLDER - "usage_policy_id": - "description": |- - PLACEHOLDER - "user_api_scopes": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.AppPermissionLevel: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_USE -github.com/databricks/cli/bundle/config/resources.Catalog: - "grants": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.Cluster: - "_": - "markdown_description": |- - The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). - "markdown_examples": |- - The following example creates a cluster named `my_cluster` and sets that as the cluster to use to run the notebook in `my_job`: - - ```yaml - bundle: - name: clusters - - resources: - clusters: - my_cluster: - num_workers: 2 - node_type_id: "i3.xlarge" - autoscale: - min_workers: 2 - max_workers: 7 - spark_version: "13.3.x-scala2.12" - spark_conf: - "spark.executor.memory": "2g" - - jobs: - my_job: - tasks: - - task_key: test_task - notebook_task: - notebook_path: "./src/my_notebook.py" - ``` - "data_security_mode": - "description": |- - PLACEHOLDER - "docker_image": - "description": |- - PLACEHOLDER - "kind": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "permissions": - "description": |- - PLACEHOLDER - "runtime_engine": - "description": |- - PLACEHOLDER - "workload_type": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.ClusterPermissionLevel: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_RESTART - - |- - CAN_ATTACH_TO -github.com/databricks/cli/bundle/config/resources.Dashboard: - "_": - "markdown_description": |- - The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](/dashboards/index.md). - "markdown_examples": |- - The following example includes and deploys the sample __NYC Taxi Trip Analysis__ dashboard to the Databricks workspace. - - ``` yaml - resources: - dashboards: - nyc_taxi_trip_analysis: - display_name: "NYC Taxi Trip Analysis" - file_path: ../src/nyc_taxi_trip_analysis.lvdash.json - warehouse_id: ${var.warehouse_id} - ``` - If you use the UI to modify the dashboard, modifications made through the UI are not applied to the dashboard JSON file in the local bundle unless you explicitly update it using `bundle generate`. You can use the `--watch` option to continuously poll and retrieve changes to the dashboard. See [_](/dev-tools/cli/bundle-commands.md#generate). - - In addition, if you attempt to deploy a bundle that contains a dashboard JSON file that is different than the one in the remote workspace, an error will occur. To force the deploy and overwrite the dashboard in the remote workspace with the local one, use the `--force` option. See [_](/dev-tools/cli/bundle-commands.md#deploy). - "create_time": - "description": |- - The timestamp of when the dashboard was created. - "dashboard_id": - "description": |- - UUID identifying the dashboard. - "display_name": - "description": |- - The display name of the dashboard. - "embed_credentials": - "description": |- - PLACEHOLDER - "etag": - "description": |- - The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard - has not been modified since the last read. - This field is excluded in List Dashboards responses. - "file_path": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "lifecycle_state": - "description": |- - The state of the dashboard resource. Used for tracking trashed status. - "parent_path": - "description": |- - The workspace path of the folder containing the dashboard. Includes leading slash and no - trailing slash. - This field is excluded in List Dashboards responses. - "path": - "description": |- - The workspace path of the dashboard asset, including the file name. - Exported dashboards always have the file extension `.lvdash.json`. - This field is excluded in List Dashboards responses. - "permissions": - "description": |- - PLACEHOLDER - "serialized_dashboard": - "description": |- - The contents of the dashboard in serialized string form. - This field is excluded in List Dashboards responses. - Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) - to retrieve an example response, which includes the `serialized_dashboard` field. - This field provides the structure of the JSON string that represents the dashboard's - layout and components. - "update_time": - "description": |- - The timestamp of when the dashboard was last updated by the user. - This field is excluded in List Dashboards responses. - "warehouse_id": - "description": |- - The warehouse ID used to run the dashboard. -github.com/databricks/cli/bundle/config/resources.DashboardPermissionLevel: - "_": - "enum": - - |- - CAN_READ - - |- - CAN_RUN - - |- - CAN_EDIT - - |- - CAN_MANAGE -github.com/databricks/cli/bundle/config/resources.DatabaseCatalog: - "create_database_if_not_exists": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "uid": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.DatabaseInstance: - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "permissions": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.DatabaseInstancePermissionLevel: - "_": - "enum": - - |- - CAN_CREATE - - |- - CAN_USE - - |- - CAN_MANAGE -github.com/databricks/cli/bundle/config/resources.DatabaseProjectPermissionLevel: - "_": - "enum": - - |- - CAN_USE - - |- - CAN_MANAGE -github.com/databricks/cli/bundle/config/resources.ExternalLocation: - "grants": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.Job: - "_": - "markdown_description": |- - The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a Databricks Asset Bundles template to create a job, see [_](/dev-tools/bundles/jobs-tutorial.md). - "markdown_examples": |- - The following example defines a job with the resource key `hello-job` with one notebook task: - - ```yaml - resources: - jobs: - hello-job: - name: hello-job - tasks: - - task_key: hello-task - notebook_task: - notebook_path: ./hello.py - ``` - - For information about defining job tasks and overriding job settings, see [_](/dev-tools/bundles/job-task-types.md), [_](/dev-tools/bundles/job-task-override.md), and [_](/dev-tools/bundles/cluster-override.md). - "health": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "permissions": - "description": |- - PLACEHOLDER - "run_as": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.JobPermissionLevel: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_MANAGE_RUN - - |- - CAN_VIEW - - |- - IS_OWNER -github.com/databricks/cli/bundle/config/resources.MlflowExperiment: - "_": - "markdown_description": |- - The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](/mlflow/experiments.md). - "markdown_examples": |- - The following example defines an experiment that all users can view: - - ```yaml - resources: - experiments: - experiment: - name: my_ml_experiment - permissions: - - level: CAN_READ - group_name: users - description: MLflow experiment used to track runs - ``` - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "permissions": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.MlflowExperimentPermissionLevel: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_EDIT - - |- - CAN_READ -github.com/databricks/cli/bundle/config/resources.MlflowModel: - "_": - "markdown_description": |- - The model resource allows you to define [legacy models](/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use Unity Catalog [registered models](#registered-model) instead. - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "permissions": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.MlflowModelPermissionLevel: - "_": - "enum": - - |- - CAN_EDIT - - |- - CAN_MANAGE - - |- - CAN_MANAGE_STAGING_VERSIONS - - |- - CAN_MANAGE_PRODUCTION_VERSIONS - - |- - CAN_READ -github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: - "_": - "markdown_description": |- - The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](/machine-learning/model-serving/manage-serving-endpoints.md). - "markdown_examples": |- - The following example defines a Unity Catalog model serving endpoint: - - ```yaml - resources: - model_serving_endpoints: - uc_model_serving_endpoint: - name: "uc-model-endpoint" - config: - served_entities: - - entity_name: "myCatalog.mySchema.my-ads-model" - entity_version: "10" - workload_size: "Small" - scale_to_zero_enabled: "true" - traffic_config: - routes: - - served_model_name: "my-ads-model-10" - traffic_percentage: "100" - tags: - - key: "team" - value: "data science" - ``` - "description": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "permissions": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.ModelServingEndpointPermissionLevel: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_QUERY - - |- - CAN_VIEW -github.com/databricks/cli/bundle/config/resources.Pipeline: - "_": - "markdown_description": |- - The pipeline resource allows you to create Delta Live Tables [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Databricks Asset Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). - "markdown_examples": |- - The following example defines a pipeline with the resource key `hello-pipeline`: - - ```yaml - resources: - pipelines: - hello-pipeline: - name: hello-pipeline - clusters: - - label: default - num_workers: 1 - development: true - continuous: false - channel: CURRENT - edition: CORE - photon: false - libraries: - - notebook: - path: ./pipeline.py - ``` - "dry_run": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "permissions": - "description": |- - PLACEHOLDER - "run_as": - "description": |- - PLACEHOLDER - "trigger": - "deprecation_message": |- - Use continuous instead -github.com/databricks/cli/bundle/config/resources.PipelinePermissionLevel: - "_": - "enum": - - |- - CAN_MANAGE - - |- - IS_OWNER - - |- - CAN_RUN - - |- - CAN_VIEW -github.com/databricks/cli/bundle/config/resources.QualityMonitor: - "_": - "markdown_description": |- - The quality_monitor resource allows you to define a Unity Catalog [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](/machine-learning/model-serving/monitor-diagnose-endpoints.md). - "markdown_examples": |- - The following example defines a quality monitor: - - ```yaml - resources: - quality_monitors: - my_quality_monitor: - table_name: dev.mlops_schema.predictions - output_schema_name: ${bundle.target}.mlops_schema - assets_dir: /Users/${workspace.current_user.userName}/databricks_lakehouse_monitoring - inference_log: - granularities: [1 day] - model_id_col: model_id - prediction_col: prediction - label_col: price - problem_type: PROBLEM_TYPE_REGRESSION - timestamp_col: timestamp - schedule: - quartz_cron_expression: 0 0 8 * * ? # Run Every day at 8am - timezone_id: UTC - ``` - "inference_log": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "table_name": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.RegisteredModel: - "_": - "markdown_description": |- - The registered model resource allows you to define models in Unity Catalog. For information about Unity Catalog [registered models](/api/workspace/registeredmodels/create), see [_](/machine-learning/manage-model-lifecycle/index.md). - "markdown_examples": |- - The following example defines a registered model in Unity Catalog: - - ```yaml - resources: - registered_models: - model: - name: my_model - catalog_name: ${bundle.target} - schema_name: mlops_schema - comment: Registered model in Unity Catalog for ${bundle.target} deployment target - grants: - - privileges: - - EXECUTE - principal: account users - ``` - "aliases": - "description": |- - PLACEHOLDER - "browse_only": - "description": |- - PLACEHOLDER - "created_at": - "description": |- - PLACEHOLDER - "created_by": - "description": |- - PLACEHOLDER - "full_name": - "description": |- - PLACEHOLDER - "grants": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "metastore_id": - "description": |- - PLACEHOLDER - "owner": - "description": |- - PLACEHOLDER - "updated_at": - "description": |- - PLACEHOLDER - "updated_by": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.Schema: - "_": - "markdown_description": |- - The schema resource type allows you to define Unity Catalog [schemas](/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: - - - The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema. - - Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](/api/workspace/schemas/update). - "markdown_examples": |- - The following example defines a pipeline with the resource key `my_pipeline` that creates a Unity Catalog schema with the key `my_schema` as the target: - - ```yaml - resources: - pipelines: - my_pipeline: - name: test-pipeline-{{.unique_id}} - libraries: - - notebook: - path: ./nb.sql - development: true - catalog: main - target: ${resources.schemas.my_schema.id} - - schemas: - my_schema: - name: test-schema-{{.unique_id}} - catalog_name: main - comment: This schema was created by DABs. - ``` - - A top-level grants mapping is not supported by Databricks Asset Bundles, so if you want to set grants for a schema, define the grants for the schema within the `schemas` mapping. For more information about grants, see [_](/data-governance/unity-catalog/manage-privileges/index.md#grant). - - The following example defines a Unity Catalog schema with grants: - - ```yaml - resources: - schemas: - my_schema: - name: test-schema - grants: - - principal: users - privileges: - - CAN_MANAGE - - principal: my_team - privileges: - - CAN_READ - catalog_name: main - ``` - "grants": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "properties": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.SecretScopePermissionLevel: - "_": - "enum": - - |- - READ - - |- - WRITE - - |- - MANAGE -github.com/databricks/cli/bundle/config/resources.SqlWarehouse: - "enable_photon": - "description": |- - Configures whether the warehouse should use Photon optimized clusters. - - Defaults to true. - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "permissions": - "description": |- - PLACEHOLDER - "spot_instance_policy": - "description": |- - PLACEHOLDER - "warehouse_type": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.SqlWarehousePermissionLevel: - "_": - "enum": - - |- - CAN_MANAGE - - |- - CAN_USE - - |- - CAN_MONITOR - - |- - CAN_VIEW -github.com/databricks/cli/bundle/config/resources.SyncedDatabaseTable: - "lifecycle": - "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config/resources.Volume: - "_": - "markdown_description": |- - The volume resource type allows you to define and create Unity Catalog [volumes](/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: - - - A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use Databricks Asset Bundles to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments. - - - Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets). - "markdown_examples": |- - The following example creates a Unity Catalog volume with the key `my_volume`: - - ```yaml - resources: - volumes: - my_volume: - catalog_name: main - name: my_volume - schema_name: my_schema - ``` - - For an example bundle that runs a job that writes to a file in Unity Catalog volume, see the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples/tree/main/knowledge_base/write_from_job_to_volume). - "grants": - "description": |- - PLACEHOLDER - "lifecycle": - "description": |- - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - "volume_type": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: - "create_time": - "description": |- - PLACEHOLDER - "creator": - "description": |- - PLACEHOLDER - "deployment_artifacts": - "description": |- - PLACEHOLDER - "deployment_id": - "description": |- - PLACEHOLDER - "mode": - "description": |- - PLACEHOLDER - "source_code_path": - "description": |- - PLACEHOLDER - "status": - "description": |- - PLACEHOLDER - "update_time": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: - "source_code_path": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: - "message": - "description": |- - PLACEHOLDER - "state": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResource: - "app": - "description": |- - PLACEHOLDER - "database": - "description": |- - PLACEHOLDER - "experiment": - "description": |- - PLACEHOLDER - "genie_space": - "description": |- - PLACEHOLDER - "job": - "description": |- - PLACEHOLDER - "secret": - "description": |- - PLACEHOLDER - "serving_endpoint": - "description": |- - PLACEHOLDER - "sql_warehouse": - "description": |- - PLACEHOLDER - "uc_securable": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceDatabase: - "database_name": - "description": |- - PLACEHOLDER - "instance_name": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceExperiment: - "experiment_id": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceGenieSpace: - "name": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER - "space_id": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: - "id": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: - "key": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER - "scope": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: - "name": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: - "id": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceUcSecurable: - "permission": - "description": |- - PLACEHOLDER - "securable_full_name": - "description": |- - PLACEHOLDER - "securable_type": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: - "message": - "description": |- - PLACEHOLDER - "state": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: - "message": - "description": |- - PLACEHOLDER - "state": {} -github.com/databricks/databricks-sdk-go/service/catalog.AwsSqsQueue: - "managed_resource_id": - "description": |- - PLACEHOLDER - "queue_url": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/catalog.AzureQueueStorage: - "managed_resource_id": - "description": |- - PLACEHOLDER - "queue_url": - "description": |- - PLACEHOLDER - "resource_group": - "description": |- - PLACEHOLDER - "subscription_id": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/catalog.EncryptionDetails: - "sse_encryption_details": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/catalog.FileEventQueue: - "managed_aqs": - "description": |- - PLACEHOLDER - "managed_pubsub": - "description": |- - PLACEHOLDER - "managed_sqs": - "description": |- - PLACEHOLDER - "provided_aqs": - "description": |- - PLACEHOLDER - "provided_pubsub": - "description": |- - PLACEHOLDER - "provided_sqs": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/catalog.GcpPubsub: - "managed_resource_id": - "description": |- - PLACEHOLDER - "subscription_name": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLog: - "granularities": - "description": |- - Granularities for aggregating data into time windows based on their timestamp. Valid values are 5 minutes, 30 minutes, 1 hour, 1 day, n weeks, 1 month, or 1 year. -github.com/databricks/databricks-sdk-go/service/catalog.MonitorTimeSeries: - "granularities": - "description": |- - Granularities for aggregating data into time windows based on their timestamp. Valid values are 5 minutes, 30 minutes, 1 hour, 1 day, n weeks, 1 month, or 1 year. -github.com/databricks/databricks-sdk-go/service/catalog.RegisteredModelAlias: - "catalog_name": - "description": |- - PLACEHOLDER - "id": - "description": |- - PLACEHOLDER - "model_name": - "description": |- - PLACEHOLDER - "schema_name": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/catalog.SseEncryptionDetails: - "algorithm": - "description": |- - PLACEHOLDER - "aws_kms_key_arn": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/catalog.SseEncryptionDetailsAlgorithm: - "_": - "description": |- - SSE algorithm to use for encrypting S3 objects - "enum": - - |- - AWS_SSE_KMS - - |- - AWS_SSE_S3 -github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: - "availability": - "description": |- - PLACEHOLDER - "ebs_volume_type": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes: - "availability": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec: - "data_security_mode": - "description": |- - PLACEHOLDER - "docker_image": - "description": |- - PLACEHOLDER - "kind": - "description": |- - PLACEHOLDER - "runtime_engine": - "description": |- - PLACEHOLDER - "workload_type": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/compute.DockerImage: - "basic_auth": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/compute.Environment: - "dependencies": - "description": |- - List of pip dependencies, as supported by the version of pip in this environment. - "java_dependencies": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/compute.GcpAttributes: - "availability": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo: - "abfss": - "description": |- - Contains the Azure Data Lake Storage destination path -github.com/databricks/databricks-sdk-go/service/compute.Kind: - "_": - "enum": - - |- - CLASSIC_PREVIEW -github.com/databricks/databricks-sdk-go/service/compute.LogAnalyticsInfo: - "log_analytics_primary_key": - "description": |- - The primary key for the Azure Log Analytics agent configuration - "log_analytics_workspace_id": - "description": |- - The workspace ID for the Azure Log Analytics agent configuration -github.com/databricks/databricks-sdk-go/service/database.SyncedTablePosition: - "delta_table_sync_info": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.DashboardTask: - "dashboard_id": - "description": |- - PLACEHOLDER - "subscription": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.GenAiComputeTask: - "compute": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.GitSource: - "git_snapshot": - "description": |- - PLACEHOLDER - "sparse_checkout": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.JobEnvironment: - "spec": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRule: - "metric": - "description": |- - PLACEHOLDER - "op": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules: - "rules": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask: - "python_named_params": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.Subscription: - "subscribers": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.SubscriptionSubscriber: - "destination_id": - "description": |- - PLACEHOLDER - "user_name": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.Task: - "dbt_platform_task": - "description": |- - PLACEHOLDER - "gen_ai_compute_task": - "description": |- - PLACEHOLDER - "health": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.TriggerSettings: - "model": - "description": |- - PLACEHOLDER - "table_update": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/jobs.Webhook: - "id": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/pipelines.CronTrigger: - "quartz_cron_schedule": - "description": |- - PLACEHOLDER - "timezone_id": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/pipelines.IngestionPipelineDefinition: - "netsuite_jar_path": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/pipelines.PipelineLibrary: - "whl": - "deprecation_message": |- - This field is deprecated -github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger: - "cron": - "description": |- - PLACEHOLDER - "manual": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig: - "workday_report_parameters": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/serving.Route: - "served_entity_name": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput: - "entity_version": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput: - "model_name": - "description": |- - PLACEHOLDER - "model_version": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/sql.AlertV2Notification: - "subscriptions": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/sql.AlertV2Operand: - "column": - "description": |- - PLACEHOLDER - "value": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/sql.AlertV2OperandColumn: - "aggregation": - "description": |- - PLACEHOLDER - "display": - "description": |- - PLACEHOLDER - "name": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/sql.AlertV2OperandValue: - "bool_value": - "description": |- - PLACEHOLDER - "double_value": - "description": |- - PLACEHOLDER - "string_value": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/sql.AlertV2Subscription: - "destination_id": - "description": |- - PLACEHOLDER - "user_email": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/sql.Channel: - "dbsql_version": - "description": |- - PLACEHOLDER - "name": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/sql.EndpointTagPair: - "key": - "description": |- - PLACEHOLDER - "value": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/sql.EndpointTags: - "custom_tags": - "description": |- - PLACEHOLDER diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go index 252f2ffb2d..37c6ac7fd0 100644 --- a/bundle/internal/schema/main.go +++ b/bundle/internal/schema/main.go @@ -202,24 +202,20 @@ func main() { func generateSchema(workdir, outputFile string, docsMode bool) { annotationsPath := filepath.Join(workdir, "annotations.yml") - annotationsOpenApiPath := filepath.Join(workdir, "annotations_openapi.yml") - annotationsOpenApiOverridesPath := filepath.Join(workdir, "annotations_openapi_overrides.yml") - // Input file, the databricks openapi spec. + // Parse the OpenAPI spec if available, for reading SDK type descriptions directly. + var p *openapiParser inputFile := os.Getenv("DATABRICKS_OPENAPI_SPEC") if inputFile != "" { - p, err := newParser(inputFile) - if err != nil { - log.Fatal(err) - } - fmt.Printf("Writing OpenAPI annotations to %s\n", annotationsOpenApiPath) - err = p.extractAnnotations(reflect.TypeOf(config.Root{}), annotationsOpenApiPath, annotationsOpenApiOverridesPath) + var err error + p, err = newParser(inputFile) if err != nil { log.Fatal(err) } + fmt.Printf("Using OpenAPI spec from %s\n", inputFile) } - a, err := newAnnotationHandler([]string{annotationsOpenApiPath, annotationsOpenApiOverridesPath, annotationsPath}) + a, err := newAnnotationHandler(annotationsPath, p) if err != nil { log.Fatal(err) } diff --git a/bundle/internal/schema/main_test.go b/bundle/internal/schema/main_test.go index 1b655d2e16..f2db1862b6 100644 --- a/bundle/internal/schema/main_test.go +++ b/bundle/internal/schema/main_test.go @@ -40,25 +40,25 @@ func copyFile(src, dst string) error { return out.Close() } -// Checks whether descriptions are added for new config fields in the annotations.yml file +// Checks whether descriptions are added for new config fields in the annotations.yml file. +// This test requires the DATABRICKS_OPENAPI_SPEC environment variable to be set to +// determine which fields get descriptions from the OpenAPI spec vs which need manual entries. +// // If this test fails either manually add descriptions to the `annotations.yml` or do the following: -// 1. for fields described outside of CLI package fetch latest schema from the OpenAPI spec and add path to file to DATABRICKS_OPENAPI_SPEC env variable -// 2. run `make schema` from the repository root to add placeholder descriptions -// 2. replace all "PLACEHOLDER" values with the actual descriptions if possible -// 3. run `make schema` again to regenerate the schema with acutal descriptions +// 1. Run `make schema` from the repository root to add placeholder descriptions +// 2. Replace all "PLACEHOLDER" values with the actual descriptions if possible +// 3. Run `make schema` again to regenerate the schema with actual descriptions func TestRequiredAnnotationsForNewFields(t *testing.T) { + if os.Getenv("DATABRICKS_OPENAPI_SPEC") == "" { + t.Skip("DATABRICKS_OPENAPI_SPEC not set, skipping annotation completeness check") + } + workdir := t.TempDir() annotationsPath := path.Join(workdir, "annotations.yml") - annotationsOpenApiPath := path.Join(workdir, "annotations_openapi.yml") - annotationsOpenApiOverridesPath := path.Join(workdir, "annotations_openapi_overrides.yml") - // Copy existing annotation files from the same folder as this test + // Copy existing annotation file from the same folder as this test err := copyFile("annotations.yml", annotationsPath) assert.NoError(t, err) - err = copyFile("annotations_openapi.yml", annotationsOpenApiPath) - assert.NoError(t, err) - err = copyFile("annotations_openapi_overrides.yml", annotationsOpenApiOverridesPath) - assert.NoError(t, err) generateSchema(workdir, path.Join(t.TempDir(), "schema.json"), false) @@ -83,35 +83,36 @@ func TestRequiredAnnotationsForNewFields(t *testing.T) { assert.Empty(t, updatedFieldPaths, "Missing JSON-schema descriptions for new config fields in bundle/internal/schema/annotations.yml:\n%s", strings.Join(updatedFieldPaths, "\n")) } -// Checks whether types in annotation files are still present in Config type +// Checks whether types in the annotations file are still present in the Config type. func TestNoDetachedAnnotations(t *testing.T) { - files := []string{ - "annotations.yml", - "annotations_openapi.yml", - "annotations_openapi_overrides.yml", + bundlePaths := map[string]bool{} + annotations, err := getAnnotations("annotations.yml") + assert.NoError(t, err) + for k := range annotations { + bundlePaths[k] = false } - types := map[string]bool{} - for _, file := range files { - annotations, err := getAnnotations(file) - assert.NoError(t, err) - for k := range annotations { - types[k] = false - } - } + // Use the path mapping to convert Go type paths to bundle paths during the walk. + m := buildPathMapping() - _, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ + _, err = jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { - delete(types, getPath(typ)) + typePath := getPath(typ) + // Match by bundle path (for entries using bundle path keys) + if bp, ok := m.typeToBundlePath[typePath]; ok { + delete(bundlePaths, bp) + } + // Also match by Go type path (for entries using Go type path keys) + delete(bundlePaths, typePath) return s }, }) assert.NoError(t, err) - for typ := range types { - t.Errorf("Type `%s` in annotations file is not found in `root.Config` type", typ) + for bp := range bundlePaths { + t.Errorf("Type `%s` in annotations file is not found in `root.Config` type", bp) } - assert.Empty(t, types, "Detached annotations found, regenerate schema and check for package path changes") + assert.Empty(t, bundlePaths, "Detached annotations found, regenerate schema and check for package path changes") } func getAnnotations(path string) (annotation.File, error) { @@ -124,26 +125,3 @@ func getAnnotations(path string) (annotation.File, error) { err = yaml.Unmarshal(b, &data) return data, err } - -func DisabledTestNoDuplicatedAnnotations(t *testing.T) { - // Check for duplicated annotations in annotation files - files := []string{ - "annotations_openapi_overrides.yml", - "annotations.yml", - } - - annotations := map[string]string{} - for _, file := range files { - annotationsFile, err := getAnnotations(file) - assert.NoError(t, err) - for typ, props := range annotationsFile { - for prop := range props { - key := typ + "_" + prop - if prevFile, ok := annotations[key]; ok { - t.Errorf("Annotation `%s` is duplicated in %s and %s", key, prevFile, file) - } - annotations[key] = file - } - } - } -} diff --git a/bundle/internal/schema/parser.go b/bundle/internal/schema/parser.go index d72524dc59..51c2a3c84c 100644 --- a/bundle/internal/schema/parser.go +++ b/bundle/internal/schema/parser.go @@ -1,7 +1,6 @@ package main import ( - "bytes" "encoding/json" "fmt" "os" @@ -10,9 +9,6 @@ import ( "slices" "strings" - "github.com/databricks/cli/bundle/internal/annotation" - "github.com/databricks/cli/libs/dyn/convert" - "github.com/databricks/cli/libs/dyn/yamlloader" "github.com/databricks/cli/libs/jsonschema" ) @@ -123,152 +119,3 @@ func isOutputOnly(s jsonschema.Schema) *bool { res := true return &res } - -// Use the OpenAPI spec to load descriptions for the given type. -func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overridesPath string) error { - annotations := annotation.File{} - overrides := annotation.File{} - - b, err := os.ReadFile(overridesPath) - if err != nil { - return err - } - overridesDyn, err := yamlloader.LoadYAML(overridesPath, bytes.NewBuffer(b)) - if err != nil { - return err - } - err = convert.ToTyped(&overrides, overridesDyn) - if err != nil { - return err - } - if overrides == nil { - overrides = annotation.File{} - } - - _, err = jsonschema.FromType(typ, []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ - func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { - ref, ok := p.findRef(typ) - if !ok { - return s - } - - basePath := getPath(typ) - pkg := map[string]annotation.Descriptor{} - annotations[basePath] = pkg - preview := ref.Preview - if preview == "PUBLIC" { - preview = "" - } - outputOnly := isOutputOnly(ref) - if ref.Description != "" || ref.Enum != nil || ref.Deprecated || ref.DeprecationMessage != "" || preview != "" || outputOnly != nil { - if ref.Deprecated && ref.DeprecationMessage == "" { - ref.DeprecationMessage = "This field is deprecated" - } - - pkg[RootTypeKey] = annotation.Descriptor{ - Description: ref.Description, - Enum: ref.Enum, - DeprecationMessage: ref.DeprecationMessage, - Preview: preview, - OutputOnly: outputOnly, - } - } - - for k := range s.Properties { - if refProp, ok := ref.Properties[k]; ok { - preview = refProp.Preview - if preview == "PUBLIC" { - preview = "" - } - - if refProp.Deprecated && refProp.DeprecationMessage == "" { - refProp.DeprecationMessage = "This field is deprecated" - } - - description := refProp.Description - - // If the field doesn't have a description, try to find the referenced type - // and use its description. This handles cases where the field references - // a type that has a description but the field itself doesn't. - if description == "" && refProp.Reference != nil { - refPath := *refProp.Reference - refTypeName := strings.TrimPrefix(refPath, "#/components/schemas/") - if refType, ok := p.ref[refTypeName]; ok { - description = refType.Description - } - } - - pkg[k] = annotation.Descriptor{ - Description: description, - Enum: refProp.Enum, - Preview: preview, - DeprecationMessage: refProp.DeprecationMessage, - OutputOnly: isOutputOnly(*refProp), - } - if description == "" { - addEmptyOverride(k, basePath, overrides) - } - } else { - addEmptyOverride(k, basePath, overrides) - } - } - return s - }, - }) - if err != nil { - return err - } - - err = saveYamlWithStyle(overridesPath, overrides) - if err != nil { - return err - } - err = saveYamlWithStyle(outputPath, annotations) - if err != nil { - return err - } - err = prependCommentToFile(outputPath, "# This file is auto-generated. DO NOT EDIT.\n") - if err != nil { - return err - } - return nil -} - -func prependCommentToFile(outputPath, comment string) error { - b, err := os.ReadFile(outputPath) - if err != nil { - return err - } - f, err := os.OpenFile(outputPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) - if err != nil { - return err - } - defer f.Close() - - _, err = f.WriteString(comment) - if err != nil { - return err - } - _, err = f.Write(b) - return err -} - -func addEmptyOverride(key, pkg string, overridesFile annotation.File) { - if overridesFile[pkg] == nil { - overridesFile[pkg] = map[string]annotation.Descriptor{} - } - - overrides := overridesFile[pkg] - if overrides[key].Description == "" { - overrides[key] = annotation.Descriptor{Description: annotation.Placeholder} - } - - a, ok := overrides[key] - if !ok { - a = annotation.Descriptor{} - } - if a.Description == "" { - a.Description = annotation.Placeholder - } - overrides[key] = a -} diff --git a/bundle/internal/schema/path_mapping.go b/bundle/internal/schema/path_mapping.go new file mode 100644 index 0000000000..4d50b973d8 --- /dev/null +++ b/bundle/internal/schema/path_mapping.go @@ -0,0 +1,167 @@ +package main + +import ( + "reflect" + "strings" + + "github.com/databricks/cli/bundle/config" +) + +// pathMapping provides bidirectional mapping between Go type paths +// (e.g., "github.com/databricks/cli/bundle/config.Bundle") and bundle paths +// (e.g., "bundle"). This allows annotations.yml to use human-readable bundle +// paths instead of Go import paths. +type pathMapping struct { + // Go type path -> bundle path (e.g., "github.com/.../config.Bundle" -> "bundle") + typeToBundlePath map[string]string + // bundle path -> Go type path + bundlePathToType map[string]string + // Track visited types to avoid infinite recursion. + visited map[string]bool +} + +// buildPathMapping walks config.Root via reflection to build mappings between +// Go type paths and bundle paths. It skips the "targets" and "environments" +// fields since they mirror the root structure. +func buildPathMapping() *pathMapping { + m := &pathMapping{ + typeToBundlePath: map[string]string{}, + bundlePathToType: map[string]string{}, + visited: map[string]bool{}, + } + + rootType := reflect.TypeOf(config.Root{}) + m.typeToBundlePath[getPath(rootType)] = "root" + m.bundlePathToType["root"] = getPath(rootType) + + m.walkType(rootType, "") + return m +} + +// walkType recursively walks a Go type and records the bundle path for each +// named struct type encountered. +func (m *pathMapping) walkType(typ reflect.Type, currentPath string) { + typ = derefPtr(typ) + + if typ.Kind() != reflect.Struct { + return + } + + typPath := getPath(typ) + if m.visited[typPath] { + return + } + m.visited[typPath] = true + + for i := range typ.NumField() { + field := typ.Field(i) + + // Skip unexported fields. + if !field.IsExported() { + continue + } + + // Anonymous (embedded) fields: walk into them at the current path + // level since their fields are promoted to the parent struct. + if field.Anonymous { + m.walkType(field.Type, currentPath) + continue + } + + // Get the JSON field name. + jsonName := jsonFieldName(field) + if jsonName == "" || jsonName == "-" { + continue + } + + // For targets/environments, record the Target type mapping and + // walk it to pick up types unique to Target (like TargetVariable). + // The visited check prevents re-walking types already seen from Root. + if currentPath == "" && (jsonName == "targets" || jsonName == "environments") { + elemType := derefPtr(field.Type) + if elemType.Kind() == reflect.Map { + valType := derefPtr(elemType.Elem()) + if valType.Name() != "" { + targetPath := jsonName + ".*" + m.recordMapping(getPath(valType), targetPath) + m.walkType(valType, targetPath) + } + } + continue + } + + fieldPath := jsonName + if currentPath != "" { + fieldPath = currentPath + "." + jsonName + } + + fieldType := derefPtr(field.Type) + + switch fieldType.Kind() { + case reflect.Map: + // For map types like map[string]*resources.Job, the path includes "*" + // for the map value type. + elemType := derefPtr(fieldType.Elem()) + elemPath := fieldPath + ".*" + if elemType.Name() != "" { + m.recordMapping(getPath(elemType), elemPath) + } + if elemType.Kind() == reflect.Struct { + m.walkType(elemType, elemPath) + } + + case reflect.Slice: + elemType := derefPtr(fieldType.Elem()) + elemPath := fieldPath + ".*" + if elemType.Name() != "" { + m.recordMapping(getPath(elemType), elemPath) + } + if elemType.Kind() == reflect.Struct { + m.walkType(elemType, elemPath) + } + + case reflect.Struct: + if fieldType.Name() != "" { + m.recordMapping(getPath(fieldType), fieldPath) + } + m.walkType(fieldType, fieldPath) + + default: + // Record non-struct named types (e.g., string enums). + if fieldType.Name() != "" && fieldType.PkgPath() != "" { + m.recordMapping(getPath(fieldType), fieldPath) + } + } + } +} + +// recordMapping records a type-to-path mapping. If a type already has a mapping, +// the shorter (higher-level) path wins. +func (m *pathMapping) recordMapping(typePath, bundlePath string) { + if existing, ok := m.typeToBundlePath[typePath]; ok { + // Keep the shorter path as canonical. + if len(existing) <= len(bundlePath) { + return + } + } + m.typeToBundlePath[typePath] = bundlePath + m.bundlePathToType[bundlePath] = typePath +} + +// derefPtr dereferences pointer types to their base type. +func derefPtr(t reflect.Type) reflect.Type { + for t.Kind() == reflect.Pointer { + t = t.Elem() + } + return t +} + +// jsonFieldName extracts the JSON field name from a struct field's json tag. +func jsonFieldName(f reflect.StructField) string { + tag := f.Tag.Get("json") + if tag == "" { + return f.Name + } + parts := strings.Split(tag, ",") + return parts[0] +} diff --git a/bundle/internal/schema/path_mapping_test.go b/bundle/internal/schema/path_mapping_test.go new file mode 100644 index 0000000000..495ad18422 --- /dev/null +++ b/bundle/internal/schema/path_mapping_test.go @@ -0,0 +1,52 @@ +package main + +import ( + "fmt" + "slices" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBuildPathMapping(t *testing.T) { + m := buildPathMapping() + + // Verify some well-known mappings + assert.Equal(t, "bundle", m.typeToBundlePath["github.com/databricks/cli/bundle/config.Bundle"]) + assert.Equal(t, "workspace", m.typeToBundlePath["github.com/databricks/cli/bundle/config.Workspace"]) + assert.Equal(t, "resources", m.typeToBundlePath["github.com/databricks/cli/bundle/config.Resources"]) + assert.Equal(t, "resources.jobs.*", m.typeToBundlePath["github.com/databricks/cli/bundle/config/resources.Job"]) + assert.Equal(t, "resources.pipelines.*", m.typeToBundlePath["github.com/databricks/cli/bundle/config/resources.Pipeline"]) + assert.Equal(t, "resources.clusters.*", m.typeToBundlePath["github.com/databricks/cli/bundle/config/resources.Cluster"]) + assert.Equal(t, "variables.*", m.typeToBundlePath["github.com/databricks/cli/bundle/config/variable.Variable"]) + + // Reverse mapping + assert.Equal(t, "github.com/databricks/cli/bundle/config.Bundle", m.bundlePathToType["bundle"]) + assert.Equal(t, "github.com/databricks/cli/bundle/config/resources.Job", m.bundlePathToType["resources.jobs.*"]) +} + +func TestPathMappingCoversAllAnnotatedTypes(t *testing.T) { + m := buildPathMapping() + + annotations, err := getAnnotations("annotations.yml") + assert.NoError(t, err) + + var unmapped []string + for typePath := range annotations { + // Skip keys that are already bundle paths (after conversion). + if _, ok := m.bundlePathToType[typePath]; ok { + continue + } + if _, ok := m.typeToBundlePath[typePath]; !ok { + unmapped = append(unmapped, typePath) + } + } + + if len(unmapped) > 0 { + slices.Sort(unmapped) + for _, u := range unmapped { + fmt.Printf("Unmapped: %s\n", u) + } + } + assert.Empty(t, unmapped, "All annotated types should have a bundle path mapping") +} diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 4c78fa6683..a852e4e3d1 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -65,12 +65,15 @@ "type": "object", "properties": { "custom_description": { + "description": "Custom description for the alert. support mustache template.", "$ref": "#/$defs/string" }, "custom_summary": { + "description": "Custom summary for the alert. support mustache template.", "$ref": "#/$defs/string" }, "display_name": { + "description": "The display name of the alert.", "$ref": "#/$defs/string" }, "evaluation": { @@ -80,21 +83,26 @@ "$ref": "#/$defs/string" }, "lifecycle": { + "description": "Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle" }, "parent_path": { + "description": "The workspace path of the folder containing the alert. Can only be set on create, and cannot be updated.", "$ref": "#/$defs/string" }, "permissions": { "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.AlertPermission" }, "query_text": { + "description": "Text of the query to be run.", "$ref": "#/$defs/string" }, "run_as": { + "description": "Specifies the identity that will be used to run the alert.\nThis field allows you to configure alerts to run as a specific user or service principal.\n- For user identity: Set `user_name` to the email of an active workspace user. Users can only set this to their own email.\n- For service principal: Set `service_principal_name` to the application ID. Requires the `servicePrincipal/user` role.\nIf not specified, the alert will run as the request user.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/sql.AlertV2RunAs" }, "run_as_user_name": { + "description": "The run as username or application ID of service principal.\nOn Create and Update, this field can be set to application ID of an active service principal. Setting this field requires the servicePrincipal/user role.\nDeprecated: Use `run_as` field instead. This field will be removed in a future release.", "$ref": "#/$defs/string", "deprecationMessage": "This field is deprecated", "deprecated": true @@ -103,6 +111,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/sql.CronSchedule" }, "warehouse_id": { + "description": "ID of the SQL warehouse attached to the alert.", "$ref": "#/$defs/string" } }, @@ -333,33 +342,42 @@ "type": "object", "properties": { "comment": { + "description": "User-provided free-form text description.", "$ref": "#/$defs/string" }, "connection_name": { + "description": "The name of the connection to an external data source.", "$ref": "#/$defs/string" }, "grants": { "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/catalog.PrivilegeAssignment" }, "lifecycle": { + "description": "Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle" }, "name": { + "description": "Name of catalog.", "$ref": "#/$defs/string" }, "options": { + "description": "A map of key-value properties attached to the securable.", "$ref": "#/$defs/map/string" }, "properties": { + "description": "A map of key-value properties attached to the securable.", "$ref": "#/$defs/map/string" }, "provider_name": { + "description": "The name of delta sharing provider.\n\nA Delta Sharing catalog is a catalog that is based on a Delta share on a remote sharing server.", "$ref": "#/$defs/string" }, "share_name": { + "description": "The name of the share under the share provider.", "$ref": "#/$defs/string" }, "storage_root": { + "description": "Storage root URL for managed tables within catalog.", "$ref": "#/$defs/string" } }, @@ -413,9 +431,11 @@ "$ref": "#/$defs/map/string" }, "data_security_mode": { + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used when `kind = CLASSIC_PREVIEW`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DataSecurityMode" }, "docker_image": { + "description": "Custom docker image BYOC", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DockerImage" }, "driver_instance_pool_id": { @@ -455,6 +475,7 @@ "$ref": "#/$defs/bool" }, "kind": { + "description": "The kind of compute described by this compute specification.\n\nDepending on `kind`, different validations and default values will be applied.\n\nClusters with `kind = CLASSIC_PREVIEW` support the following fields, whereas clusters with no specified `kind` do not.\n* [is_single_node](/api/workspace/clusters/create#is_single_node)\n* [use_ml_runtime](/api/workspace/clusters/create#use_ml_runtime)\n* [data_security_mode](/api/workspace/clusters/create#data_security_mode) set to `DATA_SECURITY_MODE_AUTO`, `DATA_SECURITY_MODE_DEDICATED`, or `DATA_SECURITY_MODE_STANDARD`\n\nBy using the [simple form](https://docs.databricks.com/compute/simple-form.html), your clusters are automatically using `kind = CLASSIC_PREVIEW`.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.Kind" }, "lifecycle": { @@ -481,6 +502,7 @@ "$ref": "#/$defs/int" }, "runtime_engine": { + "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.RuntimeEngine" }, "single_user_name": { @@ -516,6 +538,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.NodeTypeFlexibility" }, "workload_type": { + "description": "Cluster Attributes showing for clusters workload types.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.WorkloadType" } }, @@ -893,39 +916,50 @@ "type": "object", "properties": { "comment": { + "description": "User-provided free-form text description.", "$ref": "#/$defs/string" }, "credential_name": { + "description": "Name of the storage credential used with this location.", "$ref": "#/$defs/string" }, "enable_file_events": { + "description": "Whether to enable file events on this external location. Default to `true`. Set to `false` to disable file events.\nThe actual applied value may differ due to server-side defaults; check `effective_enable_file_events` for the effective state.", "$ref": "#/$defs/bool" }, "encryption_details": { + "description": "Encryption options that apply to clients connecting to cloud storage.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.EncryptionDetails" }, "fallback": { + "description": "Indicates whether fallback mode is enabled for this external location. When fallback mode is enabled, the access to the location falls back to cluster credentials if UC credentials are not sufficient.", "$ref": "#/$defs/bool" }, "file_event_queue": { + "description": "File event queue settings. If `enable_file_events` is not `false`, must be defined and have exactly one of the documented properties.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.FileEventQueue" }, "grants": { "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/catalog.PrivilegeAssignment" }, "lifecycle": { + "description": "Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle" }, "name": { + "description": "Name of the external location.", "$ref": "#/$defs/string" }, "read_only": { + "description": "Indicates whether the external location is read-only.", "$ref": "#/$defs/bool" }, "skip_validation": { + "description": "Skips validation of the storage credential associated with the external location.", "$ref": "#/$defs/bool" }, "url": { + "description": "Path URL of the external location.", "$ref": "#/$defs/string" } }, @@ -972,6 +1006,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.GitSource" }, "health": { + "description": "An optional set of health rules that can be defined for this job.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" }, "job_clusters": { @@ -1010,6 +1045,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.QueueSettings" }, "run_as": { + "description": "The user or service principal that the job runs as, if specified in the request.\nThis field indicates the explicit configuration of `run_as` for the job.\nTo find the value in all cases, explicit or implicit, use `run_as_user_name`.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs" }, "schedule": { @@ -1523,6 +1559,7 @@ "$ref": "#/$defs/string" }, "run_as": { + "description": "Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline.\n\nOnly `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.RunAs" }, "schema": { @@ -1550,7 +1587,7 @@ "trigger": { "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger", - "deprecationMessage": "Use continuous instead", + "deprecationMessage": "This field is deprecated", "deprecated": true }, "usage_policy_id": { @@ -1624,30 +1661,38 @@ "$ref": "#/$defs/string" }, "expire_time": { + "description": "Absolute expiration timestamp. When set, the branch will expire at this time.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/common/types/time.Time" }, "is_protected": { + "description": "When set to true, protects the branch from deletion and reset. Associated compute endpoints and the project cannot be deleted while the branch is protected.", "$ref": "#/$defs/bool" }, "lifecycle": { + "description": "Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle" }, "no_expiry": { + "description": "Explicitly disable expiration. When set to true, the branch will not expire.\nIf set to false, the request is invalid; provide either ttl or expire_time instead.", "$ref": "#/$defs/bool" }, "parent": { "$ref": "#/$defs/string" }, "source_branch": { + "description": "The name of the source branch from which this branch was created (data lineage for point-in-time recovery).\nIf not specified, defaults to the project's default branch.\nFormat: projects/{project_id}/branches/{branch_id}", "$ref": "#/$defs/string" }, "source_branch_lsn": { + "description": "The Log Sequence Number (LSN) on the source branch from which this branch was created.", "$ref": "#/$defs/string" }, "source_branch_time": { + "description": "The point in time on the source branch from which this branch was created.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/common/types/time.Time" }, "ttl": { + "description": "Relative time-to-live duration. When set, the branch will expire at creation_time + ttl.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/common/types/duration.Duration" } }, @@ -1669,36 +1714,47 @@ "type": "object", "properties": { "autoscaling_limit_max_cu": { + "description": "The maximum number of Compute Units. Minimum value is 0.5.", "$ref": "#/$defs/float64" }, "autoscaling_limit_min_cu": { + "description": "The minimum number of Compute Units. Minimum value is 0.5.", "$ref": "#/$defs/float64" }, "disabled": { + "description": "Whether to restrict connections to the compute endpoint.\nEnabling this option schedules a suspend compute operation.\nA disabled compute endpoint cannot be enabled by a connection or\nconsole action.", "$ref": "#/$defs/bool" }, "endpoint_id": { "$ref": "#/$defs/string" }, "endpoint_type": { + "description": "The endpoint type. A branch can only have one READ_WRITE endpoint.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/postgres.EndpointType" }, "group": { - "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/postgres.EndpointGroupSpec" + "description": "Settings for optional HA configuration of the endpoint. If unspecified, the endpoint defaults\nto non HA settings, with a single compute backing the endpoint (and no readable secondaries\nfor Read/Write endpoints).", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/postgres.EndpointGroupSpec", + "x-databricks-preview": "PRIVATE", + "doNotSuggest": true }, "lifecycle": { + "description": "Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle" }, "no_suspension": { + "description": "When set to true, explicitly disables automatic suspension (never suspend).\nShould be set to true when provided.", "$ref": "#/$defs/bool" }, "parent": { "$ref": "#/$defs/string" }, "settings": { + "description": "A collection of settings for a compute endpoint.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/postgres.EndpointSettings" }, "suspend_timeout_duration": { + "description": "Duration of inactivity after which the compute endpoint is automatically suspended.\nIf specified should be between 60s and 604800s (1 minute to 1 week).", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/common/types/duration.Duration" } }, @@ -1721,27 +1777,34 @@ "type": "object", "properties": { "budget_policy_id": { + "description": "The desired budget policy to associate with the project.\nSee status.budget_policy_id for the policy that is actually applied to the project.", "$ref": "#/$defs/string" }, "custom_tags": { + "description": "Custom tags to associate with the project. Forwarded to LBM for billing and cost tracking.\nTo update tags, provide the new tag list and include \"spec.custom_tags\" in the update_mask.\nTo clear all tags, provide an empty list and include \"spec.custom_tags\" in the update_mask.\nTo preserve existing tags, omit this field from the update_mask (or use wildcard \"*\" which auto-excludes empty tags).", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/postgres.ProjectCustomTag" }, "default_endpoint_settings": { + "description": "A collection of settings for a compute endpoint.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/postgres.ProjectDefaultEndpointSettings" }, "display_name": { + "description": "Human-readable project name. Length should be between 1 and 256 characters.", "$ref": "#/$defs/string" }, "history_retention_duration": { + "description": "The number of seconds to retain the shared history for point in time recovery for all branches in this project. Value should be between 0s and 2592000s (up to 30 days).", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/common/types/duration.Duration" }, "lifecycle": { + "description": "Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle" }, "permissions": { "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.DatabaseProjectPermission" }, "pg_version": { + "description": "The major Postgres version number. Supported versions are 16 and 17.", "$ref": "#/$defs/int" }, "project_id": { @@ -1849,9 +1912,11 @@ "type": "object", "properties": { "aliases": { + "description": "List of aliases associated with the registered model", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/catalog.RegisteredModelAlias" }, "browse_only": { + "description": "Indicates whether the principal is limited to retrieving metadata for the associated object through the BROWSE privilege when include_browse is enabled in the request.", "$ref": "#/$defs/bool" }, "catalog_name": { @@ -1863,12 +1928,15 @@ "$ref": "#/$defs/string" }, "created_at": { + "description": "Creation timestamp of the registered model in milliseconds since the Unix epoch", "$ref": "#/$defs/int64" }, "created_by": { + "description": "The identifier of the user who created the registered model", "$ref": "#/$defs/string" }, "full_name": { + "description": "The three-level (fully qualified) name of the registered model", "$ref": "#/$defs/string" }, "grants": { @@ -1879,6 +1947,7 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle" }, "metastore_id": { + "description": "The unique identifier of the metastore", "$ref": "#/$defs/string" }, "name": { @@ -1886,6 +1955,7 @@ "$ref": "#/$defs/string" }, "owner": { + "description": "The identifier of the user who owns the registered model", "$ref": "#/$defs/string" }, "schema_name": { @@ -1897,9 +1967,11 @@ "$ref": "#/$defs/string" }, "updated_at": { + "description": "Last-update timestamp of the registered model in milliseconds since the Unix epoch", "$ref": "#/$defs/int64" }, "updated_by": { + "description": "The identifier of the user who updated the registered model last time", "$ref": "#/$defs/string" } }, @@ -1937,6 +2009,7 @@ "$ref": "#/$defs/string" }, "properties": { + "description": "A map of key-value properties attached to the securable.", "$ref": "#/$defs/map/string" }, "storage_root": { @@ -2099,6 +2172,7 @@ "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.SqlWarehousePermission" }, "spot_instance_policy": { + "description": "Configurations whether the endpoint should use spot instances.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/sql.SpotInstancePolicy" }, "tags": { @@ -2106,6 +2180,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/sql.EndpointTags" }, "warehouse_type": { + "description": "Warehouse type: `PRO` or `CLASSIC`. If you want to use serverless compute,\nyou must set to `PRO` and also set the field `enable_serverless_compute` to `true`.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/sql.CreateWarehouseRequestWarehouseType" } }, @@ -2170,18 +2245,23 @@ "description": "Next field marker: 18", "properties": { "database_instance_name": { + "description": "Name of the target database instance. This is required when creating synced database tables in standard catalogs.\nThis is optional when creating synced database tables in registered catalogs. If this field is specified\nwhen creating synced database tables in registered catalogs, the database instance name MUST\nmatch that of the registered catalog (or the request will be rejected).", "$ref": "#/$defs/string" }, "lifecycle": { + "description": "Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle" }, "logical_database_name": { + "description": "Target Postgres database object (logical database) name for this table.\n\nWhen creating a synced table in a registered Postgres catalog, the\ntarget Postgres database name is inferred to be that of the registered catalog.\nIf this field is specified in this scenario, the Postgres database name MUST\nmatch that of the registered catalog (or the request will be rejected).\n\nWhen creating a synced table in a standard catalog, this field is required.\nIn this scenario, specifying this field will allow targeting an arbitrary postgres database.\nNote that this has implications for the `create_database_objects_is_missing` field in `spec`.", "$ref": "#/$defs/string" }, "name": { + "description": "Full three-part (catalog, schema, table) name of the table.", "$ref": "#/$defs/string" }, "spec": { + "description": "Specification of a synced database table.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/database.SyncedTableSpec" } }, @@ -2229,6 +2309,7 @@ "$ref": "#/$defs/string" }, "volume_type": { + "description": "The type of the volume. An external volume is located in the specified external location.\nA managed volume is located in the default location which is specified by the parent schema, or the parent catalog, or the Metastore.\n[Learn more](https://docs.databricks.com/aws/en/volumes/managed-vs-external)", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.VolumeType" } }, @@ -3035,6 +3116,7 @@ "$ref": "#/$defs/slice/string" }, "deployment_id": { + "description": "The unique id of the deployment.", "$ref": "#/$defs/string" }, "env_vars": { @@ -3048,9 +3130,11 @@ "doNotSuggest": true }, "mode": { + "description": "The mode of which the deployment will manage the source code.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentMode" }, "source_code_path": { + "description": "The workspace file system path of the source code used to create the app deployment. This is different from\n`deployment_artifacts.source_code_path`, which is the path used by the deployed app. The former refers\nto the original source code location of the app in the workspace during deployment creation, whereas\nthe latter provides a system generated stable snapshotted source code path used by the deployment.", "$ref": "#/$defs/string" } }, @@ -3068,6 +3152,7 @@ "type": "object", "properties": { "source_code_path": { + "description": "The snapshotted workspace file system path of the source code loaded by the deployed app.", "$ref": "#/$defs/string" } }, @@ -3322,9 +3407,11 @@ "type": "object", "properties": { "id": { + "description": "Id of the job to grant permission on.", "$ref": "#/$defs/string" }, "permission": { + "description": "Permissions to grant on the Job. Supported permissions are: \"CAN_MANAGE\", \"IS_OWNER\", \"CAN_MANAGE_RUN\", \"CAN_VIEW\".", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceJobJobPermission" } }, @@ -3363,12 +3450,15 @@ "type": "object", "properties": { "key": { + "description": "Key of the secret to grant permission on.", "$ref": "#/$defs/string" }, "permission": { + "description": "Permission to grant on the secret scope. For secrets, only one permission is allowed. Permission must be one of: \"READ\", \"WRITE\", \"MANAGE\".", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecretSecretPermission" }, "scope": { + "description": "Scope of the secret to grant permission on.", "$ref": "#/$defs/string" } }, @@ -3408,9 +3498,11 @@ "type": "object", "properties": { "name": { + "description": "Name of the serving endpoint to grant permission on.", "$ref": "#/$defs/string" }, "permission": { + "description": "Permission to grant on the serving endpoint. Supported permissions are: \"CAN_MANAGE\", \"CAN_QUERY\", \"CAN_VIEW\".", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpointServingEndpointPermission" } }, @@ -3448,9 +3540,11 @@ "type": "object", "properties": { "id": { + "description": "Id of the SQL warehouse to grant permission on.", "$ref": "#/$defs/string" }, "permission": { + "description": "Permission to grant on the SQL warehouse. Supported permissions are: \"CAN_MANAGE\", \"CAN_USE\", \"IS_OWNER\".", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouseSqlWarehousePermission" } }, @@ -3581,7 +3675,8 @@ "type": "string", "enum": [ "MEDIUM", - "LARGE" + "LARGE", + "LIQUID" ] }, { @@ -3712,6 +3807,7 @@ "type": "object", "properties": { "queue_url": { + "description": "The AQS queue url in the format https://sqs.{region}.amazonaws.com/{account id}/{queue name}.\nOnly required for provided_sqs.", "$ref": "#/$defs/string" } }, @@ -3729,12 +3825,15 @@ "type": "object", "properties": { "queue_url": { + "description": "The AQS queue url in the format https://{storage account}.queue.core.windows.net/{queue name}\nOnly required for provided_aqs.", "$ref": "#/$defs/string" }, "resource_group": { + "description": "Optional resource group for the queue, event grid subscription, and external location storage\naccount.\nOnly required for locations with a service principal storage credential", "$ref": "#/$defs/string" }, "subscription_id": { + "description": "Optional subscription id for the queue, event grid subscription, and external location storage\naccount.\nRequired for locations with a service principal storage credential", "$ref": "#/$defs/string" } }, @@ -3753,6 +3852,7 @@ "description": "Encryption options that apply to clients connecting to cloud storage.", "properties": { "sse_encryption_details": { + "description": "Server-Side Encryption properties for clients communicating with AWS s3.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.SseEncryptionDetails" } }, @@ -3802,6 +3902,7 @@ "type": "object", "properties": { "subscription_name": { + "description": "The Pub/Sub subscription name in the format projects/{project}/subscriptions/{subscription name}.\nOnly required for provided_pubsub.", "$ref": "#/$defs/string" } }, @@ -4138,7 +4239,13 @@ "CREATE_CLEAN_ROOM", "MODIFY_CLEAN_ROOM", "EXECUTE_CLEAN_ROOM_TASK", - "EXTERNAL_USE_SCHEMA" + "EXTERNAL_USE_SCHEMA", + "VIEW_OBJECT", + "MANAGE_GRANTS", + "INSERT", + "UPDATE", + "DELETE", + "VIEW_ADMIN_METADATA" ] }, { @@ -4179,15 +4286,19 @@ "$ref": "#/$defs/string" }, "catalog_name": { + "description": "The name of the catalog containing the model version", "$ref": "#/$defs/string" }, "id": { + "description": "The unique identifier of the alias", "$ref": "#/$defs/string" }, "model_name": { + "description": "The name of the parent registered model of the model version, relative to parent schema", "$ref": "#/$defs/string" }, "schema_name": { + "description": "The name of the schema containing the model version, relative to parent catalog", "$ref": "#/$defs/string" }, "version_num": { @@ -4210,9 +4321,11 @@ "description": "Server-Side Encryption properties for clients communicating with AWS s3.", "properties": { "algorithm": { + "description": "Sets the value of the 'x-amz-server-side-encryption' header in S3 request.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.SseEncryptionDetailsAlgorithm" }, "aws_kms_key_arn": { + "description": "Optional. The ARN of the SSE-KMS key used with the S3 location, when algorithm = \"SSE-KMS\".\nSets the value of the 'x-amz-server-side-encryption-aws-kms-key-id' header.", "$ref": "#/$defs/string" } }, @@ -4230,8 +4343,6 @@ "type": "string", "description": "SSE algorithm to use for encrypting S3 objects", "enum": [ - "AWS_SSE_S3", - "AWS_SSE_KMS", "AWS_SSE_KMS", "AWS_SSE_S3" ] @@ -4308,6 +4419,7 @@ "description": "Attributes set during cluster creation which are related to Amazon Web Services.", "properties": { "availability": { + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AwsAvailability" }, "ebs_volume_count": { @@ -4327,6 +4439,7 @@ "$ref": "#/$defs/int" }, "ebs_volume_type": { + "description": "The type of EBS volumes that will be launched with this cluster.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.EbsVolumeType" }, "first_on_demand": { @@ -4378,6 +4491,7 @@ "description": "Attributes set during cluster creation which are related to Microsoft Azure.", "properties": { "availability": { + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero, this availability\ntype will be used for the entire cluster.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AzureAvailability" }, "first_on_demand": { @@ -4506,9 +4620,11 @@ "$ref": "#/$defs/map/string" }, "data_security_mode": { + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used when `kind = CLASSIC_PREVIEW`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DataSecurityMode" }, "docker_image": { + "description": "Custom docker image BYOC", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DockerImage" }, "driver_instance_pool_id": { @@ -4548,6 +4664,7 @@ "$ref": "#/$defs/bool" }, "kind": { + "description": "The kind of compute described by this compute specification.\n\nDepending on `kind`, different validations and default values will be applied.\n\nClusters with `kind = CLASSIC_PREVIEW` support the following fields, whereas clusters with no specified `kind` do not.\n* [is_single_node](/api/workspace/clusters/create#is_single_node)\n* [use_ml_runtime](/api/workspace/clusters/create#use_ml_runtime)\n* [data_security_mode](/api/workspace/clusters/create#data_security_mode) set to `DATA_SECURITY_MODE_AUTO`, `DATA_SECURITY_MODE_DEDICATED`, or `DATA_SECURITY_MODE_STANDARD`\n\nBy using the [simple form](https://docs.databricks.com/compute/simple-form.html), your clusters are automatically using `kind = CLASSIC_PREVIEW`.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.Kind" }, "node_type_id": { @@ -4567,6 +4684,7 @@ "$ref": "#/$defs/int" }, "runtime_engine": { + "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.RuntimeEngine" }, "single_user_name": { @@ -4602,6 +4720,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.NodeTypeFlexibility" }, "workload_type": { + "description": "Cluster Attributes showing for clusters workload types.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.WorkloadType" } }, @@ -4687,6 +4806,7 @@ "type": "object", "properties": { "basic_auth": { + "description": "Basic auth with username and password", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DockerBasicAuth" }, "url": { @@ -4743,6 +4863,7 @@ "$ref": "#/$defs/string" }, "java_dependencies": { + "description": "List of java dependencies. Each dependency is a string representing a java library path. For example: `/Volumes/path/to/test.jar`.", "$ref": "#/$defs/slice/string" } }, @@ -4761,6 +4882,7 @@ "description": "Attributes set during cluster creation which are related to GCP.", "properties": { "availability": { + "description": "This field determines whether the spark executors will be scheduled to run on preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.GcpAvailability" }, "boot_disk_size": { @@ -5796,6 +5918,7 @@ "description": "Configures the Lakeview Dashboard job task type.", "properties": { "dashboard_id": { + "description": "The identifier of the dashboard to refresh.", "$ref": "#/$defs/string" }, "filters": { @@ -5805,6 +5928,7 @@ "doNotSuggest": true }, "subscription": { + "description": "Optional: subscription configuration for sending the dashboard snapshot.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Subscription" }, "warehouse_id": { @@ -6249,6 +6373,7 @@ "$ref": "#/$defs/string" }, "spec": { + "description": "The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines.\nIn this minimal environment spec, only pip dependencies are supported.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.Environment" } }, @@ -6427,9 +6552,11 @@ "type": "object", "properties": { "metric": { + "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthMetric" }, "op": { + "description": "Specifies the operator used to compare the health metric value with the specified threshold.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthOperator" }, "value": { @@ -7243,6 +7370,7 @@ "$ref": "#/$defs/bool" }, "subscribers": { + "description": "The list of subscribers to send the snapshot of the dashboard to.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.SubscriptionSubscriber" } }, @@ -7260,9 +7388,11 @@ "type": "object", "properties": { "destination_id": { + "description": "A snapshot of the dashboard will be sent to the destination when the `destination_id` field is present.", "$ref": "#/$defs/string" }, "user_name": { + "description": "A snapshot of the dashboard will be sent to the user's email when the `user_name` field is present.", "$ref": "#/$defs/string" } }, @@ -7385,6 +7515,7 @@ "doNotSuggest": true }, "health": { + "description": "An optional set of health rules that can be defined for this job.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" }, "job_cluster_key": { @@ -7988,6 +8119,7 @@ "$ref": "#/$defs/string" }, "netsuite_jar_path": { + "description": "Netsuite only configuration. When the field is set for a netsuite connector,\nthe jar stored in the field will be validated and added to the classpath of\npipeline's cluster.", "$ref": "#/$defs/string", "x-databricks-preview": "PRIVATE", "doNotSuggest": true @@ -8105,6 +8237,8 @@ "enum": [ "MYSQL", "POSTGRESQL", + "REDSHIFT", + "SQLDW", "SQLSERVER", "SALESFORCE", "BIGQUERY", @@ -8117,6 +8251,37 @@ "TERADATA", "SHAREPOINT", "DYNAMICS365", + "CONFLUENCE", + "META_MARKETING", + "GOOGLE_ADS", + "TIKTOK_ADS", + "SALESFORCE_MARKETING_CLOUD", + "HUBSPOT", + "WORKDAY_HCM", + "GUIDEWIRE", + "ZENDESK", + "SLACK_AUDIT_LOGS", + "CROWDSTRIKE_EVENT_STREAM", + "WORKDAY_ACTIVITY_LOGGING", + "AKAMAI_WAF", + "VEEVA", + "VEEVA_VAULT", + "M365_AUDIT_LOGS", + "OKTA_SYSTEM_LOGS", + "ONE_PASSWORD_EVENT_LOGS", + "PROOFPOINT_SIEM", + "WIZ_AUDIT_LOGS", + "GITHUB", + "OUTLOOK", + "SMARTSHEET", + "MICROSOFT_TEAMS", + "ADOBE_CAMPAIGNS", + "LINKEDIN_ADS", + "X_ADS", + "BING_ADS", + "GOOGLE_SEARCH_CONSOLE", + "PINTEREST_ADS", + "REDDIT_ADS", "FOREIGN_CATALOG" ] }, @@ -8780,6 +8945,7 @@ "$ref": "#/$defs/slice/string" }, "workday_report_parameters": { + "description": "(Optional) Additional custom parameters for Workday Report", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.IngestionPipelineDefinitionWorkdayReportParameters", "x-databricks-preview": "PRIVATE", "doNotSuggest": true @@ -10179,6 +10345,7 @@ "type": "object", "properties": { "aggregation": { + "description": "If not set, the behavior is equivalent to using `First row` in the UI.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/sql.Aggregation" }, "display": { diff --git a/python/databricks/bundles/catalogs/_models/catalog.py b/python/databricks/bundles/catalogs/_models/catalog.py index 368e96a3b2..1ca07dc7af 100644 --- a/python/databricks/bundles/catalogs/_models/catalog.py +++ b/python/databricks/bundles/catalogs/_models/catalog.py @@ -25,24 +25,53 @@ class Catalog(Resource): """""" name: VariableOr[str] + """ + Name of catalog. + """ comment: VariableOrOptional[str] = None + """ + User-provided free-form text description. + """ connection_name: VariableOrOptional[str] = None + """ + The name of the connection to an external data source. + """ grants: VariableOrList[PrivilegeAssignment] = field(default_factory=list) lifecycle: VariableOrOptional[Lifecycle] = None + """ + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + """ options: VariableOrDict[str] = field(default_factory=dict) + """ + A map of key-value properties attached to the securable. + """ properties: VariableOrDict[str] = field(default_factory=dict) + """ + A map of key-value properties attached to the securable. + """ provider_name: VariableOrOptional[str] = None + """ + The name of delta sharing provider. + + A Delta Sharing catalog is a catalog that is based on a Delta share on a remote sharing server. + """ share_name: VariableOrOptional[str] = None + """ + The name of the share under the share provider. + """ storage_root: VariableOrOptional[str] = None + """ + Storage root URL for managed tables within catalog. + """ @classmethod def from_dict(cls, value: "CatalogDict") -> "Self": @@ -56,24 +85,53 @@ class CatalogDict(TypedDict, total=False): """""" name: VariableOr[str] + """ + Name of catalog. + """ comment: VariableOrOptional[str] + """ + User-provided free-form text description. + """ connection_name: VariableOrOptional[str] + """ + The name of the connection to an external data source. + """ grants: VariableOrList[PrivilegeAssignmentParam] lifecycle: VariableOrOptional[LifecycleParam] + """ + Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + """ options: VariableOrDict[str] + """ + A map of key-value properties attached to the securable. + """ properties: VariableOrDict[str] + """ + A map of key-value properties attached to the securable. + """ provider_name: VariableOrOptional[str] + """ + The name of delta sharing provider. + + A Delta Sharing catalog is a catalog that is based on a Delta share on a remote sharing server. + """ share_name: VariableOrOptional[str] + """ + The name of the share under the share provider. + """ storage_root: VariableOrOptional[str] + """ + Storage root URL for managed tables within catalog. + """ CatalogParam = CatalogDict | Catalog diff --git a/python/databricks/bundles/catalogs/_models/privilege.py b/python/databricks/bundles/catalogs/_models/privilege.py index 20f4e1f557..39ab7de33f 100644 --- a/python/databricks/bundles/catalogs/_models/privilege.py +++ b/python/databricks/bundles/catalogs/_models/privilege.py @@ -53,6 +53,12 @@ class Privilege(Enum): MODIFY_CLEAN_ROOM = "MODIFY_CLEAN_ROOM" EXECUTE_CLEAN_ROOM_TASK = "EXECUTE_CLEAN_ROOM_TASK" EXTERNAL_USE_SCHEMA = "EXTERNAL_USE_SCHEMA" + VIEW_OBJECT = "VIEW_OBJECT" + MANAGE_GRANTS = "MANAGE_GRANTS" + INSERT = "INSERT" + UPDATE = "UPDATE" + DELETE = "DELETE" + VIEW_ADMIN_METADATA = "VIEW_ADMIN_METADATA" PrivilegeParam = ( @@ -107,6 +113,12 @@ class Privilege(Enum): "MODIFY_CLEAN_ROOM", "EXECUTE_CLEAN_ROOM_TASK", "EXTERNAL_USE_SCHEMA", + "VIEW_OBJECT", + "MANAGE_GRANTS", + "INSERT", + "UPDATE", + "DELETE", + "VIEW_ADMIN_METADATA", ] | Privilege ) diff --git a/python/databricks/bundles/jobs/_models/aws_attributes.py b/python/databricks/bundles/jobs/_models/aws_attributes.py index 62ddf7a765..64aa3ab01c 100644 --- a/python/databricks/bundles/jobs/_models/aws_attributes.py +++ b/python/databricks/bundles/jobs/_models/aws_attributes.py @@ -24,6 +24,11 @@ class AwsAttributes: """ availability: VariableOrOptional[AwsAvailability] = None + """ + Availability type used for all subsequent nodes past the `first_on_demand` ones. + + Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + """ ebs_volume_count: VariableOrOptional[int] = None """ @@ -63,6 +68,9 @@ class AwsAttributes: """ ebs_volume_type: VariableOrOptional[EbsVolumeType] = None + """ + The type of EBS volumes that will be launched with this cluster. + """ first_on_demand: VariableOrOptional[int] = None """ @@ -124,6 +132,11 @@ class AwsAttributesDict(TypedDict, total=False): """""" availability: VariableOrOptional[AwsAvailabilityParam] + """ + Availability type used for all subsequent nodes past the `first_on_demand` ones. + + Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + """ ebs_volume_count: VariableOrOptional[int] """ @@ -163,6 +176,9 @@ class AwsAttributesDict(TypedDict, total=False): """ ebs_volume_type: VariableOrOptional[EbsVolumeTypeParam] + """ + The type of EBS volumes that will be launched with this cluster. + """ first_on_demand: VariableOrOptional[int] """ diff --git a/python/databricks/bundles/jobs/_models/azure_attributes.py b/python/databricks/bundles/jobs/_models/azure_attributes.py index f82a0698aa..0e1c237902 100644 --- a/python/databricks/bundles/jobs/_models/azure_attributes.py +++ b/python/databricks/bundles/jobs/_models/azure_attributes.py @@ -24,6 +24,11 @@ class AzureAttributes: """ availability: VariableOrOptional[AzureAvailability] = None + """ + Availability type used for all subsequent nodes past the `first_on_demand` ones. + Note: If `first_on_demand` is zero, this availability + type will be used for the entire cluster. + """ first_on_demand: VariableOrOptional[int] = None """ @@ -61,6 +66,11 @@ class AzureAttributesDict(TypedDict, total=False): """""" availability: VariableOrOptional[AzureAvailabilityParam] + """ + Availability type used for all subsequent nodes past the `first_on_demand` ones. + Note: If `first_on_demand` is zero, this availability + type will be used for the entire cluster. + """ first_on_demand: VariableOrOptional[int] """ diff --git a/python/databricks/bundles/jobs/_models/cluster_spec.py b/python/databricks/bundles/jobs/_models/cluster_spec.py index a2662c5ca8..2a47935b84 100644 --- a/python/databricks/bundles/jobs/_models/cluster_spec.py +++ b/python/databricks/bundles/jobs/_models/cluster_spec.py @@ -119,8 +119,33 @@ class ClusterSpec: """ data_security_mode: VariableOrOptional[DataSecurityMode] = None + """ + Data security mode decides what data governance model to use when accessing data + from a cluster. + + The following modes can only be used when `kind = CLASSIC_PREVIEW`. + * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. + * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. + * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. + + The following modes can be used regardless of `kind`. + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. + * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. + * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and + will be removed for future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. + * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. + * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. + * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. + """ docker_image: VariableOrOptional[DockerImage] = None + """ + Custom docker image BYOC + """ driver_instance_pool_id: VariableOrOptional[str] = None """ @@ -181,6 +206,18 @@ class ClusterSpec: """ kind: VariableOrOptional[Kind] = None + """ + The kind of compute described by this compute specification. + + Depending on `kind`, different validations and default values will be applied. + + Clusters with `kind = CLASSIC_PREVIEW` support the following fields, whereas clusters with no specified `kind` do not. + * [is_single_node](/api/workspace/clusters/create#is_single_node) + * [use_ml_runtime](/api/workspace/clusters/create#use_ml_runtime) + * [data_security_mode](/api/workspace/clusters/create#data_security_mode) set to `DATA_SECURITY_MODE_AUTO`, `DATA_SECURITY_MODE_DEDICATED`, or `DATA_SECURITY_MODE_STANDARD` + + By using the [simple form](https://docs.databricks.com/compute/simple-form.html), your clusters are automatically using `kind = CLASSIC_PREVIEW`. + """ node_type_id: VariableOrOptional[str] = None """ @@ -213,6 +250,15 @@ class ClusterSpec: """ runtime_engine: VariableOrOptional[RuntimeEngine] = None + """ + Determines the cluster's runtime engine, either standard or Photon. + + This field is not compatible with legacy `spark_version` values that contain `-photon-`. + Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. + + If left unspecified, the runtime engine defaults to standard unless the spark_version + contains -photon-, in which case Photon will be used. + """ single_user_name: VariableOrOptional[str] = None """ @@ -273,6 +319,9 @@ class ClusterSpec: """ workload_type: VariableOrOptional[WorkloadType] = None + """ + Cluster Attributes showing for clusters workload types. + """ @classmethod def from_dict(cls, value: "ClusterSpecDict") -> "Self": @@ -343,8 +392,33 @@ class ClusterSpecDict(TypedDict, total=False): """ data_security_mode: VariableOrOptional[DataSecurityModeParam] + """ + Data security mode decides what data governance model to use when accessing data + from a cluster. + + The following modes can only be used when `kind = CLASSIC_PREVIEW`. + * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. + * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. + * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. + + The following modes can be used regardless of `kind`. + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. + * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. + * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and + will be removed for future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. + * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. + * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. + * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. + """ docker_image: VariableOrOptional[DockerImageParam] + """ + Custom docker image BYOC + """ driver_instance_pool_id: VariableOrOptional[str] """ @@ -405,6 +479,18 @@ class ClusterSpecDict(TypedDict, total=False): """ kind: VariableOrOptional[KindParam] + """ + The kind of compute described by this compute specification. + + Depending on `kind`, different validations and default values will be applied. + + Clusters with `kind = CLASSIC_PREVIEW` support the following fields, whereas clusters with no specified `kind` do not. + * [is_single_node](/api/workspace/clusters/create#is_single_node) + * [use_ml_runtime](/api/workspace/clusters/create#use_ml_runtime) + * [data_security_mode](/api/workspace/clusters/create#data_security_mode) set to `DATA_SECURITY_MODE_AUTO`, `DATA_SECURITY_MODE_DEDICATED`, or `DATA_SECURITY_MODE_STANDARD` + + By using the [simple form](https://docs.databricks.com/compute/simple-form.html), your clusters are automatically using `kind = CLASSIC_PREVIEW`. + """ node_type_id: VariableOrOptional[str] """ @@ -437,6 +523,15 @@ class ClusterSpecDict(TypedDict, total=False): """ runtime_engine: VariableOrOptional[RuntimeEngineParam] + """ + Determines the cluster's runtime engine, either standard or Photon. + + This field is not compatible with legacy `spark_version` values that contain `-photon-`. + Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. + + If left unspecified, the runtime engine defaults to standard unless the spark_version + contains -photon-, in which case Photon will be used. + """ single_user_name: VariableOrOptional[str] """ @@ -497,6 +592,9 @@ class ClusterSpecDict(TypedDict, total=False): """ workload_type: VariableOrOptional[WorkloadTypeParam] + """ + Cluster Attributes showing for clusters workload types. + """ ClusterSpecParam = ClusterSpecDict | ClusterSpec diff --git a/python/databricks/bundles/jobs/_models/dashboard_task.py b/python/databricks/bundles/jobs/_models/dashboard_task.py index 4f9cd829a1..f838c10df7 100644 --- a/python/databricks/bundles/jobs/_models/dashboard_task.py +++ b/python/databricks/bundles/jobs/_models/dashboard_task.py @@ -17,6 +17,9 @@ class DashboardTask: """ dashboard_id: VariableOrOptional[str] = None + """ + The identifier of the dashboard to refresh. + """ filters: VariableOrDict[str] = field(default_factory=dict) """ @@ -31,6 +34,9 @@ class DashboardTask: """ subscription: VariableOrOptional[Subscription] = None + """ + Optional: subscription configuration for sending the dashboard snapshot. + """ warehouse_id: VariableOrOptional[str] = None """ @@ -50,6 +56,9 @@ class DashboardTaskDict(TypedDict, total=False): """""" dashboard_id: VariableOrOptional[str] + """ + The identifier of the dashboard to refresh. + """ filters: VariableOrDict[str] """ @@ -64,6 +73,9 @@ class DashboardTaskDict(TypedDict, total=False): """ subscription: VariableOrOptional[SubscriptionParam] + """ + Optional: subscription configuration for sending the dashboard snapshot. + """ warehouse_id: VariableOrOptional[str] """ diff --git a/python/databricks/bundles/jobs/_models/docker_image.py b/python/databricks/bundles/jobs/_models/docker_image.py index 3a76b2bbf3..bce0a35989 100644 --- a/python/databricks/bundles/jobs/_models/docker_image.py +++ b/python/databricks/bundles/jobs/_models/docker_image.py @@ -18,6 +18,9 @@ class DockerImage: """""" basic_auth: VariableOrOptional[DockerBasicAuth] = None + """ + Basic auth with username and password + """ url: VariableOrOptional[str] = None """ @@ -36,6 +39,9 @@ class DockerImageDict(TypedDict, total=False): """""" basic_auth: VariableOrOptional[DockerBasicAuthParam] + """ + Basic auth with username and password + """ url: VariableOrOptional[str] """ diff --git a/python/databricks/bundles/jobs/_models/environment.py b/python/databricks/bundles/jobs/_models/environment.py index 3bf806c7f4..f33018e7c5 100644 --- a/python/databricks/bundles/jobs/_models/environment.py +++ b/python/databricks/bundles/jobs/_models/environment.py @@ -40,6 +40,9 @@ class Environment: """ java_dependencies: VariableOrList[str] = field(default_factory=list) + """ + List of java dependencies. Each dependency is a string representing a java library path. For example: `/Volumes/path/to/test.jar`. + """ @classmethod def from_dict(cls, value: "EnvironmentDict") -> "Self": @@ -76,6 +79,9 @@ class EnvironmentDict(TypedDict, total=False): """ java_dependencies: VariableOrList[str] + """ + List of java dependencies. Each dependency is a string representing a java library path. For example: `/Volumes/path/to/test.jar`. + """ EnvironmentParam = EnvironmentDict | Environment diff --git a/python/databricks/bundles/jobs/_models/gcp_attributes.py b/python/databricks/bundles/jobs/_models/gcp_attributes.py index 6f390c1cf8..c917108597 100644 --- a/python/databricks/bundles/jobs/_models/gcp_attributes.py +++ b/python/databricks/bundles/jobs/_models/gcp_attributes.py @@ -20,6 +20,10 @@ class GcpAttributes: """ availability: VariableOrOptional[GcpAvailability] = None + """ + This field determines whether the spark executors will be scheduled to run on preemptible + VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. + """ boot_disk_size: VariableOrOptional[int] = None """ @@ -82,6 +86,10 @@ class GcpAttributesDict(TypedDict, total=False): """""" availability: VariableOrOptional[GcpAvailabilityParam] + """ + This field determines whether the spark executors will be scheduled to run on preemptible + VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. + """ boot_disk_size: VariableOrOptional[int] """ diff --git a/python/databricks/bundles/jobs/_models/job.py b/python/databricks/bundles/jobs/_models/job.py index e836d4c9a8..53840951d0 100644 --- a/python/databricks/bundles/jobs/_models/job.py +++ b/python/databricks/bundles/jobs/_models/job.py @@ -110,6 +110,9 @@ class Job(Resource): """ health: VariableOrOptional[JobsHealthRules] = None + """ + An optional set of health rules that can be defined for this job. + """ job_clusters: VariableOrList[JobCluster] = field(default_factory=list) """ @@ -163,6 +166,11 @@ class Job(Resource): """ run_as: VariableOrOptional[JobRunAs] = None + """ + The user or service principal that the job runs as, if specified in the request. + This field indicates the explicit configuration of `run_as` for the job. + To find the value in all cases, explicit or implicit, use `run_as_user_name`. + """ schedule: VariableOrOptional[CronSchedule] = None """ @@ -255,6 +263,9 @@ class JobDict(TypedDict, total=False): """ health: VariableOrOptional[JobsHealthRulesParam] + """ + An optional set of health rules that can be defined for this job. + """ job_clusters: VariableOrList[JobClusterParam] """ @@ -308,6 +319,11 @@ class JobDict(TypedDict, total=False): """ run_as: VariableOrOptional[JobRunAsParam] + """ + The user or service principal that the job runs as, if specified in the request. + This field indicates the explicit configuration of `run_as` for the job. + To find the value in all cases, explicit or implicit, use `run_as_user_name`. + """ schedule: VariableOrOptional[CronScheduleParam] """ diff --git a/python/databricks/bundles/jobs/_models/job_environment.py b/python/databricks/bundles/jobs/_models/job_environment.py index 798685f4c6..6aa2153763 100644 --- a/python/databricks/bundles/jobs/_models/job_environment.py +++ b/python/databricks/bundles/jobs/_models/job_environment.py @@ -20,6 +20,10 @@ class JobEnvironment: """ spec: VariableOrOptional[Environment] = None + """ + The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. + In this minimal environment spec, only pip dependencies are supported. + """ @classmethod def from_dict(cls, value: "JobEnvironmentDict") -> "Self": @@ -38,6 +42,10 @@ class JobEnvironmentDict(TypedDict, total=False): """ spec: VariableOrOptional[EnvironmentParam] + """ + The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. + In this minimal environment spec, only pip dependencies are supported. + """ JobEnvironmentParam = JobEnvironmentDict | JobEnvironment diff --git a/python/databricks/bundles/jobs/_models/jobs_health_rule.py b/python/databricks/bundles/jobs/_models/jobs_health_rule.py index 149265c86a..0be82576ea 100644 --- a/python/databricks/bundles/jobs/_models/jobs_health_rule.py +++ b/python/databricks/bundles/jobs/_models/jobs_health_rule.py @@ -22,8 +22,20 @@ class JobsHealthRule: """""" metric: VariableOr[JobsHealthMetric] + """ + Specifies the health metric that is being evaluated for a particular health rule. + + * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. + * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. + * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. + * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. + * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. + """ op: VariableOr[JobsHealthOperator] + """ + Specifies the operator used to compare the health metric value with the specified threshold. + """ value: VariableOr[int] """ @@ -42,8 +54,20 @@ class JobsHealthRuleDict(TypedDict, total=False): """""" metric: VariableOr[JobsHealthMetricParam] + """ + Specifies the health metric that is being evaluated for a particular health rule. + + * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. + * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. + * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. + * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. + * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. + """ op: VariableOr[JobsHealthOperatorParam] + """ + Specifies the operator used to compare the health metric value with the specified threshold. + """ value: VariableOr[int] """ diff --git a/python/databricks/bundles/jobs/_models/subscription.py b/python/databricks/bundles/jobs/_models/subscription.py index 2d63990245..5636e02d31 100644 --- a/python/databricks/bundles/jobs/_models/subscription.py +++ b/python/databricks/bundles/jobs/_models/subscription.py @@ -29,6 +29,9 @@ class Subscription: """ subscribers: VariableOrList[SubscriptionSubscriber] = field(default_factory=list) + """ + The list of subscribers to send the snapshot of the dashboard to. + """ @classmethod def from_dict(cls, value: "SubscriptionDict") -> "Self": @@ -53,6 +56,9 @@ class SubscriptionDict(TypedDict, total=False): """ subscribers: VariableOrList[SubscriptionSubscriberParam] + """ + The list of subscribers to send the snapshot of the dashboard to. + """ SubscriptionParam = SubscriptionDict | Subscription diff --git a/python/databricks/bundles/jobs/_models/subscription_subscriber.py b/python/databricks/bundles/jobs/_models/subscription_subscriber.py index eddc8fc7aa..bb45559505 100644 --- a/python/databricks/bundles/jobs/_models/subscription_subscriber.py +++ b/python/databricks/bundles/jobs/_models/subscription_subscriber.py @@ -14,8 +14,14 @@ class SubscriptionSubscriber: """""" destination_id: VariableOrOptional[str] = None + """ + A snapshot of the dashboard will be sent to the destination when the `destination_id` field is present. + """ user_name: VariableOrOptional[str] = None + """ + A snapshot of the dashboard will be sent to the user's email when the `user_name` field is present. + """ @classmethod def from_dict(cls, value: "SubscriptionSubscriberDict") -> "Self": @@ -29,8 +35,14 @@ class SubscriptionSubscriberDict(TypedDict, total=False): """""" destination_id: VariableOrOptional[str] + """ + A snapshot of the dashboard will be sent to the destination when the `destination_id` field is present. + """ user_name: VariableOrOptional[str] + """ + A snapshot of the dashboard will be sent to the user's email when the `user_name` field is present. + """ SubscriptionSubscriberParam = SubscriptionSubscriberDict | SubscriptionSubscriber diff --git a/python/databricks/bundles/jobs/_models/task.py b/python/databricks/bundles/jobs/_models/task.py index 9e2c99517e..99cf7e7617 100644 --- a/python/databricks/bundles/jobs/_models/task.py +++ b/python/databricks/bundles/jobs/_models/task.py @@ -187,6 +187,9 @@ class Task: """ health: VariableOrOptional[JobsHealthRules] = None + """ + An optional set of health rules that can be defined for this job. + """ job_cluster_key: VariableOrOptional[str] = None """ @@ -394,6 +397,9 @@ class TaskDict(TypedDict, total=False): """ health: VariableOrOptional[JobsHealthRulesParam] + """ + An optional set of health rules that can be defined for this job. + """ job_cluster_key: VariableOrOptional[str] """ diff --git a/python/databricks/bundles/pipelines/_models/aws_attributes.py b/python/databricks/bundles/pipelines/_models/aws_attributes.py index 47bc0b97ce..96d999815d 100644 --- a/python/databricks/bundles/pipelines/_models/aws_attributes.py +++ b/python/databricks/bundles/pipelines/_models/aws_attributes.py @@ -24,6 +24,11 @@ class AwsAttributes: """ availability: VariableOrOptional[AwsAvailability] = None + """ + Availability type used for all subsequent nodes past the `first_on_demand` ones. + + Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + """ ebs_volume_count: VariableOrOptional[int] = None """ @@ -63,6 +68,9 @@ class AwsAttributes: """ ebs_volume_type: VariableOrOptional[EbsVolumeType] = None + """ + The type of EBS volumes that will be launched with this cluster. + """ first_on_demand: VariableOrOptional[int] = None """ @@ -124,6 +132,11 @@ class AwsAttributesDict(TypedDict, total=False): """""" availability: VariableOrOptional[AwsAvailabilityParam] + """ + Availability type used for all subsequent nodes past the `first_on_demand` ones. + + Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + """ ebs_volume_count: VariableOrOptional[int] """ @@ -163,6 +176,9 @@ class AwsAttributesDict(TypedDict, total=False): """ ebs_volume_type: VariableOrOptional[EbsVolumeTypeParam] + """ + The type of EBS volumes that will be launched with this cluster. + """ first_on_demand: VariableOrOptional[int] """ diff --git a/python/databricks/bundles/pipelines/_models/azure_attributes.py b/python/databricks/bundles/pipelines/_models/azure_attributes.py index f06f84e9c9..42a481b193 100644 --- a/python/databricks/bundles/pipelines/_models/azure_attributes.py +++ b/python/databricks/bundles/pipelines/_models/azure_attributes.py @@ -24,6 +24,11 @@ class AzureAttributes: """ availability: VariableOrOptional[AzureAvailability] = None + """ + Availability type used for all subsequent nodes past the `first_on_demand` ones. + Note: If `first_on_demand` is zero, this availability + type will be used for the entire cluster. + """ first_on_demand: VariableOrOptional[int] = None """ @@ -61,6 +66,11 @@ class AzureAttributesDict(TypedDict, total=False): """""" availability: VariableOrOptional[AzureAvailabilityParam] + """ + Availability type used for all subsequent nodes past the `first_on_demand` ones. + Note: If `first_on_demand` is zero, this availability + type will be used for the entire cluster. + """ first_on_demand: VariableOrOptional[int] """ diff --git a/python/databricks/bundles/pipelines/_models/gcp_attributes.py b/python/databricks/bundles/pipelines/_models/gcp_attributes.py index 1deae0c124..a379a9e7a8 100644 --- a/python/databricks/bundles/pipelines/_models/gcp_attributes.py +++ b/python/databricks/bundles/pipelines/_models/gcp_attributes.py @@ -20,6 +20,10 @@ class GcpAttributes: """ availability: VariableOrOptional[GcpAvailability] = None + """ + This field determines whether the spark executors will be scheduled to run on preemptible + VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. + """ boot_disk_size: VariableOrOptional[int] = None """ @@ -82,6 +86,10 @@ class GcpAttributesDict(TypedDict, total=False): """""" availability: VariableOrOptional[GcpAvailabilityParam] + """ + This field determines whether the spark executors will be scheduled to run on preemptible + VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. + """ boot_disk_size: VariableOrOptional[int] """ diff --git a/python/databricks/bundles/pipelines/_models/ingestion_pipeline_definition.py b/python/databricks/bundles/pipelines/_models/ingestion_pipeline_definition.py index 91af6cdb57..defa761a60 100644 --- a/python/databricks/bundles/pipelines/_models/ingestion_pipeline_definition.py +++ b/python/databricks/bundles/pipelines/_models/ingestion_pipeline_definition.py @@ -67,6 +67,10 @@ class IngestionPipelineDefinition: netsuite_jar_path: VariableOrOptional[str] = None """ :meta private: [EXPERIMENTAL] + + Netsuite only configuration. When the field is set for a netsuite connector, + the jar stored in the field will be validated and added to the classpath of + pipeline's cluster. """ objects: VariableOrList[IngestionConfig] = field(default_factory=list) @@ -133,6 +137,10 @@ class IngestionPipelineDefinitionDict(TypedDict, total=False): netsuite_jar_path: VariableOrOptional[str] """ :meta private: [EXPERIMENTAL] + + Netsuite only configuration. When the field is set for a netsuite connector, + the jar stored in the field will be validated and added to the classpath of + pipeline's cluster. """ objects: VariableOrList[IngestionConfigParam] diff --git a/python/databricks/bundles/pipelines/_models/pipeline.py b/python/databricks/bundles/pipelines/_models/pipeline.py index 2b4bbed23b..81dfa4d46f 100644 --- a/python/databricks/bundles/pipelines/_models/pipeline.py +++ b/python/databricks/bundles/pipelines/_models/pipeline.py @@ -182,6 +182,11 @@ class Pipeline(Resource): """ run_as: VariableOrOptional[RunAs] = None + """ + Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline. + + Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. + """ schema: VariableOrOptional[str] = None """ @@ -347,6 +352,11 @@ class PipelineDict(TypedDict, total=False): """ run_as: VariableOrOptional[RunAsParam] + """ + Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline. + + Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. + """ schema: VariableOrOptional[str] """ diff --git a/python/databricks/bundles/pipelines/_models/table_specific_config.py b/python/databricks/bundles/pipelines/_models/table_specific_config.py index aa5ab57602..cccdd18610 100644 --- a/python/databricks/bundles/pipelines/_models/table_specific_config.py +++ b/python/databricks/bundles/pipelines/_models/table_specific_config.py @@ -108,6 +108,8 @@ class TableSpecificConfig: ] = None """ :meta private: [EXPERIMENTAL] + + (Optional) Additional custom parameters for Workday Report """ @classmethod @@ -200,6 +202,8 @@ class TableSpecificConfigDict(TypedDict, total=False): ] """ :meta private: [EXPERIMENTAL] + + (Optional) Additional custom parameters for Workday Report """ diff --git a/python/databricks/bundles/schemas/_models/privilege.py b/python/databricks/bundles/schemas/_models/privilege.py index 20f4e1f557..39ab7de33f 100644 --- a/python/databricks/bundles/schemas/_models/privilege.py +++ b/python/databricks/bundles/schemas/_models/privilege.py @@ -53,6 +53,12 @@ class Privilege(Enum): MODIFY_CLEAN_ROOM = "MODIFY_CLEAN_ROOM" EXECUTE_CLEAN_ROOM_TASK = "EXECUTE_CLEAN_ROOM_TASK" EXTERNAL_USE_SCHEMA = "EXTERNAL_USE_SCHEMA" + VIEW_OBJECT = "VIEW_OBJECT" + MANAGE_GRANTS = "MANAGE_GRANTS" + INSERT = "INSERT" + UPDATE = "UPDATE" + DELETE = "DELETE" + VIEW_ADMIN_METADATA = "VIEW_ADMIN_METADATA" PrivilegeParam = ( @@ -107,6 +113,12 @@ class Privilege(Enum): "MODIFY_CLEAN_ROOM", "EXECUTE_CLEAN_ROOM_TASK", "EXTERNAL_USE_SCHEMA", + "VIEW_OBJECT", + "MANAGE_GRANTS", + "INSERT", + "UPDATE", + "DELETE", + "VIEW_ADMIN_METADATA", ] | Privilege ) diff --git a/python/databricks/bundles/schemas/_models/schema.py b/python/databricks/bundles/schemas/_models/schema.py index d12385e395..5dfa24d41b 100644 --- a/python/databricks/bundles/schemas/_models/schema.py +++ b/python/databricks/bundles/schemas/_models/schema.py @@ -47,6 +47,9 @@ class Schema(Resource): """ properties: VariableOrDict[str] = field(default_factory=dict) + """ + A map of key-value properties attached to the securable. + """ storage_root: VariableOrOptional[str] = None """ @@ -87,6 +90,9 @@ class SchemaDict(TypedDict, total=False): """ properties: VariableOrDict[str] + """ + A map of key-value properties attached to the securable. + """ storage_root: VariableOrOptional[str] """ diff --git a/python/databricks/bundles/volumes/_models/privilege.py b/python/databricks/bundles/volumes/_models/privilege.py index 20f4e1f557..39ab7de33f 100644 --- a/python/databricks/bundles/volumes/_models/privilege.py +++ b/python/databricks/bundles/volumes/_models/privilege.py @@ -53,6 +53,12 @@ class Privilege(Enum): MODIFY_CLEAN_ROOM = "MODIFY_CLEAN_ROOM" EXECUTE_CLEAN_ROOM_TASK = "EXECUTE_CLEAN_ROOM_TASK" EXTERNAL_USE_SCHEMA = "EXTERNAL_USE_SCHEMA" + VIEW_OBJECT = "VIEW_OBJECT" + MANAGE_GRANTS = "MANAGE_GRANTS" + INSERT = "INSERT" + UPDATE = "UPDATE" + DELETE = "DELETE" + VIEW_ADMIN_METADATA = "VIEW_ADMIN_METADATA" PrivilegeParam = ( @@ -107,6 +113,12 @@ class Privilege(Enum): "MODIFY_CLEAN_ROOM", "EXECUTE_CLEAN_ROOM_TASK", "EXTERNAL_USE_SCHEMA", + "VIEW_OBJECT", + "MANAGE_GRANTS", + "INSERT", + "UPDATE", + "DELETE", + "VIEW_ADMIN_METADATA", ] | Privilege ) diff --git a/python/databricks/bundles/volumes/_models/volume.py b/python/databricks/bundles/volumes/_models/volume.py index bf77c831d6..55c6e35016 100644 --- a/python/databricks/bundles/volumes/_models/volume.py +++ b/python/databricks/bundles/volumes/_models/volume.py @@ -57,6 +57,11 @@ class Volume(Resource): """ volume_type: VariableOrOptional[VolumeType] = None + """ + The type of the volume. An external volume is located in the specified external location. + A managed volume is located in the default location which is specified by the parent schema, or the parent catalog, or the Metastore. + [Learn more](https://docs.databricks.com/aws/en/volumes/managed-vs-external) + """ @classmethod def from_dict(cls, value: "VolumeDict") -> "Self": @@ -102,6 +107,11 @@ class VolumeDict(TypedDict, total=False): """ volume_type: VariableOrOptional[VolumeTypeParam] + """ + The type of the volume. An external volume is located in the specified external location. + A managed volume is located in the default location which is specified by the parent schema, or the parent catalog, or the Metastore. + [Learn more](https://docs.databricks.com/aws/en/volumes/managed-vs-external) + """ VolumeParam = VolumeDict | Volume