From 9e08a89b2f271c34d857e16f370bc6ed7dd2acf4 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 9 Aug 2024 15:46:17 +0200 Subject: [PATCH 1/2] Add example showing a job running an sbt-built Scala JAR --- knowledge_base/sbt-example/.gitignore | 4 +++ knowledge_base/sbt-example/README.md | 33 +++++++++++++++++++ knowledge_base/sbt-example/build.sbt | 10 ++++++ knowledge_base/sbt-example/databricks.yml | 28 ++++++++++++++++ .../sbt-example/project/build.properties | 1 + knowledge_base/sbt-example/resources/job.yml | 27 +++++++++++++++ .../src/main/scala/example/SparkApp.scala | 12 +++++++ 7 files changed, 115 insertions(+) create mode 100644 knowledge_base/sbt-example/.gitignore create mode 100644 knowledge_base/sbt-example/README.md create mode 100644 knowledge_base/sbt-example/build.sbt create mode 100644 knowledge_base/sbt-example/databricks.yml create mode 100644 knowledge_base/sbt-example/project/build.properties create mode 100644 knowledge_base/sbt-example/resources/job.yml create mode 100644 knowledge_base/sbt-example/src/main/scala/example/SparkApp.scala diff --git a/knowledge_base/sbt-example/.gitignore b/knowledge_base/sbt-example/.gitignore new file mode 100644 index 0000000..27fb80b --- /dev/null +++ b/knowledge_base/sbt-example/.gitignore @@ -0,0 +1,4 @@ +/.bsp/ +target/ +.databricks +.vscode diff --git a/knowledge_base/sbt-example/README.md b/knowledge_base/sbt-example/README.md new file mode 100644 index 0000000..363cef1 --- /dev/null +++ b/knowledge_base/sbt-example/README.md @@ -0,0 +1,33 @@ +# SBT example + +This example demonstrates how to build a Scala JAR with [sbt](https://www.scala-sbt.org/) and use it from a job. + +## Prerequisites + +* Databricks CLI v0.226.0 or above +* [sbt](https://www.scala-sbt.org/) v1.10.1 or above + +## Usage + +Update the `host` field under `workspace` in `databricks.yml` to the Databricks workspace you wish to deploy to. + +Update the `artifact_path` field under `workspace` in `databricks.yml` to the Unity Catalog Volume path where the JAR artifact needs to be deployed. + +Run `databricks bundle deploy` to deploy the job. + +Run `databricks bundle run spark_jar_job` to run the job. + +Example output: + +``` +% databricks bundle run example_job +Run URL: https://... + +2024-08-09 15:49:17 "Example running a Scala JAR built with sbt" TERMINATED SUCCESS ++-----+ +| word| ++-----+ +|Hello| +|World| ++-----+ +``` diff --git a/knowledge_base/sbt-example/build.sbt b/knowledge_base/sbt-example/build.sbt new file mode 100644 index 0000000..69ca324 --- /dev/null +++ b/knowledge_base/sbt-example/build.sbt @@ -0,0 +1,10 @@ +name := "sbt-example" + +version := "0.1.0-SNAPSHOT" + +scalaVersion := "2.12.19" + +libraryDependencies ++= Seq( + "org.apache.spark" %% "spark-core" % "3.5.0", + "org.apache.spark" %% "spark-sql" % "3.5.0" +) diff --git a/knowledge_base/sbt-example/databricks.yml b/knowledge_base/sbt-example/databricks.yml new file mode 100644 index 0000000..1ee830f --- /dev/null +++ b/knowledge_base/sbt-example/databricks.yml @@ -0,0 +1,28 @@ +bundle: + name: sbt_example + +include: + - ./resources/job.yml + +workspace: + host: https://myworkspace.cloud.databricks.com + + # JARs must be stored in a Unity Catalog Volume. + # Uncomment the line below and replace the path with the path to your Unity Catalog Volume. + # + # artifact_path: /Volumes/my_catalog/my_schema/my_volume/some_path + +artifacts: + sbt_example: + type: jar + build: sbt package + files: + - source: ./target/scala-2.12/sbt-example*.jar + +permissions: + - group_name: users + level: CAN_VIEW + +targets: + dev: + default: true diff --git a/knowledge_base/sbt-example/project/build.properties b/knowledge_base/sbt-example/project/build.properties new file mode 100644 index 0000000..ee4c672 --- /dev/null +++ b/knowledge_base/sbt-example/project/build.properties @@ -0,0 +1 @@ +sbt.version=1.10.1 diff --git a/knowledge_base/sbt-example/resources/job.yml b/knowledge_base/sbt-example/resources/job.yml new file mode 100644 index 0000000..c59966c --- /dev/null +++ b/knowledge_base/sbt-example/resources/job.yml @@ -0,0 +1,27 @@ +resources: + jobs: + example_job: + name: "Example running a Scala JAR built with sbt" + + tasks: + - task_key: task + + spark_jar_task: + main_class_name: SparkApp + + libraries: + - jar: ../target/scala-2.12/sbt-example*.jar + + new_cluster: + node_type_id: i3.xlarge + spark_version: 15.4.x-scala2.12 + num_workers: 0 + spark_conf: + "spark.databricks.cluster.profile": "singleNode" + "spark.master": "local[*, 4]" + custom_tags: + "ResourceClass": "SingleNode" + + # The cluster must run in single user isolation mode. + # This means it is compatible with Unity Catalog and can access Unity Catalog Volumes. + data_security_mode: SINGLE_USER diff --git a/knowledge_base/sbt-example/src/main/scala/example/SparkApp.scala b/knowledge_base/sbt-example/src/main/scala/example/SparkApp.scala new file mode 100644 index 0000000..d589888 --- /dev/null +++ b/knowledge_base/sbt-example/src/main/scala/example/SparkApp.scala @@ -0,0 +1,12 @@ +import org.apache.spark.sql.SparkSession + +object SparkApp { + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder().getOrCreate() + + import spark.implicits._ + + val data = Seq("Hello", "World").toDF("word") + data.show() + } +} From d39242f2cc9d3634ca671ef5c063ee9bd168e180 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 9 Aug 2024 16:50:20 +0200 Subject: [PATCH 2/2] Update README.md --- knowledge_base/sbt-example/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/knowledge_base/sbt-example/README.md b/knowledge_base/sbt-example/README.md index 363cef1..eb5452d 100644 --- a/knowledge_base/sbt-example/README.md +++ b/knowledge_base/sbt-example/README.md @@ -1,4 +1,4 @@ -# SBT example +# sbt example This example demonstrates how to build a Scala JAR with [sbt](https://www.scala-sbt.org/) and use it from a job.