Skip to content

Commit b7c9297

Browse files
committed
jupyter api allows multiple integration files now
1 parent 2fb01c3 commit b7c9297

File tree

5 files changed

+182
-87
lines changed

5 files changed

+182
-87
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*-
2+
* =LICENSE=
3+
* Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
4+
* ----------
5+
* Copyright (C) 2019 - 2022 JetBrains
6+
* ----------
7+
* Licensed under the Apache License, Version 2.0 (the "License");
8+
* you may not use this file except in compliance with the License.
9+
* You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
* =LICENSEEND=
19+
*/
20+
package org.jetbrains.kotlinx.spark.api.jupyter
21+
22+
import org.apache.spark.api.java.JavaRDDLike
23+
import org.apache.spark.rdd.RDD
24+
import org.apache.spark.sql.Dataset
25+
import org.intellij.lang.annotations.Language
26+
import org.jetbrains.kotlinx.jupyter.api.HTML
27+
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
28+
import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
29+
30+
abstract class Integration : JupyterIntegration() {
31+
32+
private val kotlinVersion = "1.6.21"
33+
private val scalaCompatVersion = "2.12"
34+
private val scalaVersion = "2.12.15"
35+
private val spark3Version = "3.2.1"
36+
37+
abstract fun KotlinKernelHost.onLoaded()
38+
39+
override fun Builder.onLoaded() {
40+
41+
dependencies(
42+
"org.apache.spark:spark-repl_$scalaCompatVersion:$spark3Version",
43+
"org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlinVersion",
44+
"org.jetbrains.kotlin:kotlin-reflect:$kotlinVersion",
45+
"org.apache.spark:spark-sql_$scalaCompatVersion:$spark3Version",
46+
"org.apache.spark:spark-streaming_$scalaCompatVersion:$spark3Version",
47+
"org.apache.spark:spark-mllib_$scalaCompatVersion:$spark3Version",
48+
"org.apache.spark:spark-sql_$scalaCompatVersion:$spark3Version",
49+
"org.apache.spark:spark-graphx_$scalaCompatVersion:$spark3Version",
50+
"org.apache.spark:spark-launcher_$scalaCompatVersion:$spark3Version",
51+
"org.apache.spark:spark-catalyst_$scalaCompatVersion:$spark3Version",
52+
"org.apache.spark:spark-streaming_$scalaCompatVersion:$spark3Version",
53+
"org.apache.spark:spark-core_$scalaCompatVersion:$spark3Version",
54+
"org.scala-lang:scala-library:$scalaVersion",
55+
"org.scala-lang.modules:scala-xml_$scalaCompatVersion:2.0.1",
56+
"org.scala-lang:scala-reflect:$scalaVersion",
57+
"org.scala-lang:scala-compiler:$scalaVersion",
58+
"commons-io:commons-io:2.11.0",
59+
)
60+
61+
println("SparkIntegration loaded")
62+
63+
import(
64+
"org.jetbrains.kotlinx.spark.api.*",
65+
"org.jetbrains.kotlinx.spark.api.tuples.*",
66+
*(1..22).map { "scala.Tuple$it" }.toTypedArray(),
67+
"org.apache.spark.sql.functions.*",
68+
"org.apache.spark.*",
69+
"org.apache.spark.sql.*",
70+
"org.apache.spark.api.java.*",
71+
"scala.collection.Seq",
72+
"org.apache.spark.rdd.*",
73+
"java.io.Serializable",
74+
"org.apache.spark.streaming.api.java.*",
75+
"org.apache.spark.streaming.api.*",
76+
"org.apache.spark.streaming.*",
77+
)
78+
79+
onLoaded {
80+
onLoaded()
81+
}
82+
83+
// Render Dataset
84+
render<Dataset<*>> {
85+
HTML(it.toHtml())
86+
}
87+
88+
render<RDD<*>> {
89+
HTML(it.toJavaRDD().toHtml())
90+
}
91+
92+
render<JavaRDDLike<*, *>> {
93+
HTML(it.toHtml())
94+
}
95+
}
96+
}

kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt

Lines changed: 22 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -33,122 +33,58 @@ import java.io.InputStreamReader
3333

3434

3535
import org.apache.spark.*
36+
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
3637
import scala.collection.*
3738
import org.jetbrains.kotlinx.spark.api.SparkSession
3839
import scala.Product
3940
import java.io.Serializable
4041
import scala.collection.Iterable as ScalaIterable
4142
import scala.collection.Iterator as ScalaIterator
4243

44+
/**
45+
* %use kotlin-spark-api
46+
*/
4347
@Suppress("UNUSED_VARIABLE", "LocalVariableName")
4448
@OptIn(ExperimentalStdlibApi::class)
45-
internal class SparkIntegration : JupyterIntegration() {
46-
47-
private val kotlinVersion = "1.6.21"
48-
private val scalaCompatVersion = "2.12"
49-
private val scalaVersion = "2.12.15"
50-
private val spark3Version = "3.2.1"
51-
52-
override fun Builder.onLoaded() {
53-
54-
dependencies(
55-
"org.apache.spark:spark-repl_$scalaCompatVersion:$spark3Version",
56-
"org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlinVersion",
57-
"org.jetbrains.kotlin:kotlin-reflect:$kotlinVersion",
58-
"org.apache.spark:spark-sql_$scalaCompatVersion:$spark3Version",
59-
"org.apache.spark:spark-streaming_$scalaCompatVersion:$spark3Version",
60-
"org.apache.spark:spark-mllib_$scalaCompatVersion:$spark3Version",
61-
"org.apache.spark:spark-sql_$scalaCompatVersion:$spark3Version",
62-
"org.apache.spark:spark-graphx_$scalaCompatVersion:$spark3Version",
63-
"org.apache.spark:spark-launcher_$scalaCompatVersion:$spark3Version",
64-
"org.apache.spark:spark-catalyst_$scalaCompatVersion:$spark3Version",
65-
"org.apache.spark:spark-streaming_$scalaCompatVersion:$spark3Version",
66-
"org.apache.spark:spark-core_$scalaCompatVersion:$spark3Version",
67-
"org.scala-lang:scala-library:$scalaVersion",
68-
"org.scala-lang.modules:scala-xml_$scalaCompatVersion:2.0.1",
69-
"org.scala-lang:scala-reflect:$scalaVersion",
70-
"org.scala-lang:scala-compiler:$scalaVersion",
71-
"commons-io:commons-io:2.11.0",
72-
)
73-
74-
println("SparkIntegration loaded")
75-
76-
import(
77-
"org.jetbrains.kotlinx.spark.api.*",
78-
"org.jetbrains.kotlinx.spark.api.tuples.*",
79-
*(1..22).map { "scala.Tuple$it" }.toTypedArray(),
80-
"org.apache.spark.sql.functions.*",
81-
"org.apache.spark.*",
82-
"org.apache.spark.sql.*",
83-
"org.apache.spark.api.java.*",
84-
"scala.collection.Seq",
85-
"org.apache.spark.rdd.*",
86-
"java.io.Serializable",
87-
"org.apache.spark.streaming.api.java.*",
88-
"org.apache.spark.streaming.api.*",
89-
"org.apache.spark.streaming.*",
90-
)
49+
internal class SparkIntegration : Integration() {
9150

92-
// onLoaded is only done for the non-streaming variant of kotlin-spark-api in the json file
93-
onLoaded {
94-
val _0 = execute("""%dumpClassesForSpark""")
51+
override fun KotlinKernelHost.onLoaded() {
52+
val _0 = execute("""%dumpClassesForSpark""")
9553

96-
@Language("kts")
97-
val _1 = listOf(
98-
"""
54+
@Language("kts")
55+
val _1 = listOf(
56+
"""
9957
val spark = org.jetbrains.kotlinx.spark.api.SparkSession
10058
.builder()
10159
.master(SparkConf().get("spark.master", "local[*]"))
10260
.appName("Jupyter")
10361
.config("spark.sql.codegen.wholeStage", false)
10462
.config("spark.io.compression.codec", "snappy")
10563
.getOrCreate()""".trimIndent(),
106-
"""
64+
"""
10765
spark.sparkContext.setLogLevel(org.jetbrains.kotlinx.spark.api.SparkLogLevel.ERROR)""".trimIndent(),
108-
"""
66+
"""
10967
val sc by lazy {
11068
org.apache.spark.api.java.JavaSparkContext(spark.sparkContext)
11169
}""".trimIndent(),
112-
"""
70+
"""
11371
println("Spark session has been started and is running. No `withSpark { }` necessary, you can access `spark` and `sc` directly. To use Spark streaming, use `%use kotlin-spark-api-streaming` instead.")""".trimIndent(),
114-
"""
72+
"""
11573
inline fun <reified T> List<T>.toDS(): Dataset<T> = toDS(spark)""".trimIndent(),
116-
"""
74+
"""
11775
inline fun <reified T> Array<T>.toDS(): Dataset<T> = spark.dsOf(*this)""".trimIndent(),
118-
"""
76+
"""
11977
inline fun <reified T> dsOf(vararg arg: T): Dataset<T> = spark.dsOf(*arg)""".trimIndent(),
120-
"""
78+
"""
12179
inline fun <reified T> RDD<T>.toDS(): Dataset<T> = toDS(spark)""".trimIndent(),
122-
"""
80+
"""
12381
inline fun <reified T> JavaRDDLike<T, *>.toDS(): Dataset<T> = toDS(spark)""".trimIndent(),
124-
"""
82+
"""
12583
inline fun <reified T> RDD<T>.toDF(): Dataset<Row> = toDF(spark)""".trimIndent(),
126-
"""
84+
"""
12785
inline fun <reified T> JavaRDDLike<T, *>.toDF(): Dataset<Row> = toDF(spark)""".trimIndent(),
128-
"""
86+
"""
12987
val udf: UDFRegistration get() = spark.udf()""".trimIndent(),
130-
).map(::execute)
131-
}
132-
133-
onShutdown {
134-
@Language("kts")
135-
val _0 = execute("""
136-
spark.stop()""".trimIndent()
137-
)
138-
}
139-
140-
141-
// Render Dataset
142-
render<Dataset<*>> {
143-
HTML(it.toHtml())
144-
}
145-
146-
render<RDD<*>> {
147-
HTML(it.toJavaRDD().toHtml())
148-
}
149-
150-
render<JavaRDDLike<*, *>> {
151-
HTML(it.toHtml())
152-
}
88+
).map(::execute)
15389
}
15490
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*-
2+
* =LICENSE=
3+
* Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
4+
* ----------
5+
* Copyright (C) 2019 - 2022 JetBrains
6+
* ----------
7+
* Licensed under the Apache License, Version 2.0 (the "License");
8+
* you may not use this file except in compliance with the License.
9+
* You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
* =LICENSEEND=
19+
*/
20+
package org.jetbrains.kotlinx.spark.api.jupyter
21+
22+
import kotlinx.html.*
23+
import kotlinx.html.stream.appendHTML
24+
import org.apache.spark.api.java.JavaRDDLike
25+
import org.apache.spark.rdd.RDD
26+
import org.apache.spark.sql.Dataset
27+
import org.apache.spark.unsafe.array.ByteArrayMethods
28+
import org.intellij.lang.annotations.Language
29+
import org.jetbrains.kotlinx.jupyter.api.HTML
30+
import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
31+
import org.jetbrains.kotlinx.spark.api.*
32+
import java.io.InputStreamReader
33+
34+
35+
import org.apache.spark.*
36+
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
37+
import scala.collection.*
38+
import org.jetbrains.kotlinx.spark.api.SparkSession
39+
import scala.Product
40+
import java.io.Serializable
41+
import scala.collection.Iterable as ScalaIterable
42+
import scala.collection.Iterator as ScalaIterator
43+
44+
/**
45+
* %use kotlin-spark-api-streaming
46+
*/
47+
@Suppress("UNUSED_VARIABLE", "LocalVariableName")
48+
@OptIn(ExperimentalStdlibApi::class)
49+
internal class SparkStreamingIntegration : Integration() {
50+
51+
override fun KotlinKernelHost.onLoaded() {
52+
val _0 = execute("""%dumpClassesForSpark""")
53+
54+
@Language("kts")
55+
val _1 = listOf(
56+
"""
57+
println("To start a spark streaming session, simply use `withSparkStreaming { }` inside a cell. To use Spark normally, use `withSpark { }` in a cell, or use `%use kotlin-spark-api` to start a Spark session for the whole notebook.")""".trimIndent(),
58+
).map(::execute)
59+
}
60+
}

kotlin-spark-api/3.2/src/main/resources/META-INF/kotlin-jupyter-libraries/libraries.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
"producers": [
44
{
55
"fqn": "org.jetbrains.kotlinx.spark.api.jupyter.SparkIntegration"
6+
},
7+
{
8+
"fqn": "org.jetbrains.kotlinx.spark.api.jupyter.SparkStreamingIntegration"
69
}
710
]
811
}

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
<kotest-extensions-allure.version>1.1.0</kotest-extensions-allure.version>
1818
<embedded-kafka.version>3.1.0</embedded-kafka.version>
1919
<spark3.version>3.2.1</spark3.version>
20-
<kotlin-jupyter-api.version>0.11.0-77</kotlin-jupyter-api.version>
20+
<kotlin-jupyter-api.version>0.11.0-79</kotlin-jupyter-api.version>
2121
<kotlinx.html.version>0.7.3</kotlinx.html.version>
2222
<hadoop.version>3.3.1</hadoop.version>
2323
<!-- <junit.version>5.8.2</junit.version>-->

0 commit comments

Comments
 (0)