updating readme and example

Jolanrensen · Jolanrensen · commit 54b9d1066e5a · 2022-04-21T13:18:51.000+02:00
diff --git a/README.md b/README.md
@@ -23,6 +23,7 @@ We have opened a Spark Project Improvement Proposal: [Kotlin support for Apache
     - [Column infix/operator functions](#column-infixoperator-functions)
     - [Overload Resolution Ambiguity](#overload-resolution-ambiguity)
     - [Tuples](#tuples)
+    - [Streaming](#streaming)
 - [Examples](#examples)
 - [Reporting issues/Support](#reporting-issuessupport)
 - [Code of Conduct](#code-of-conduct)
@@ -267,6 +268,48 @@ Finally, all these tuple helper functions are also baked in:
 - `map`
 - `cast`
 
+### Streaming
+
+A popular Spark extension is [Spark Streaming](https://spark.apache.org/docs/latest/streaming-programming-guide.html). 
+Of course the Kotlin Spark API also introduces a more Kotlin-esque approach to write your streaming programs.
+There are examples for use with a checkpoint, Kafka and SQL in the [examples module](examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/streaming).
+
+We shall also provide a quick example below:
+```kotlin
+// Automatically provides ssc: JavaStreamingContext which starts and awaits termination or timeout
+withSparkStreaming(batchDuration = Durations.seconds(1), timeout = 10_000) { // this: KSparkStreamingSession
+
+    // create input stream for, for instance, Netcat: `$ nc -lk 9999`
+    val lines: JavaReceiverInputDStream<String> = ssc.socketTextStream("localhost", 9999)
+  
+    // split input stream on space
+    val words: JavaDStream<String> = lines.flatMap { it.split(" ").iterator() }
+
+    // perform action on each formed RDD in the stream
+    words.foreachRDD { rdd: JavaRDD<String>, _: Time ->
+      
+          // to convert the JavaRDD to a Dataset, we need a spark session using the RDD context
+          withSpark(rdd) { // this: KSparkSession
+            val dataframe: Dataset<TestRow> = rdd.map { TestRow(word = it) }.toDS()
+            dataframe
+                .groupByKey { it.word }
+                .count()
+                .show()
+            // +-----+--------+
+            // |  key|count(1)|
+            // +-----+--------+
+            // |hello|       1|
+            // |   is|       1|
+            // |    a|       1|
+            // | this|       1|
+            // | test|       3|
+            // +-----+--------+
+        }
+    }
+}
+```
+
+
 ## Examples
 
 For more, check out [examples](https://github.com/JetBrains/kotlin-spark-api/tree/master/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples) module.
diff --git a/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/streaming/Streaming.kt b/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/streaming/Streaming.kt
@@ -19,8 +19,12 @@
  */
 package org.jetbrains.kotlinx.spark.examples.streaming
 
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.sql.Dataset
 import org.apache.spark.streaming.Durations
+import org.apache.spark.streaming.Time
+import org.apache.spark.streaming.api.java.JavaDStream
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream
 import org.jetbrains.kotlinx.spark.api.*
 
 data class TestRow(
@@ -32,22 +36,18 @@ data class TestRow(
  *
  * `$ nc -lk 9999`
  */
-fun main() = withSparkStreaming(Durations.seconds(1), timeout = 10_000) {
+fun main() = withSparkStreaming(batchDuration = Durations.seconds(1), timeout = 10_000) { // this: KSparkStreamingSession
 
-    val lines = ssc.socketTextStream("localhost", 9999)
-    val words = lines.flatMap { it.split(" ").iterator() }
-
-    words.foreachRDD { rdd, _ ->
-        withSpark(rdd) {
+    val lines: JavaReceiverInputDStream<String> = ssc.socketTextStream("localhost", 9999)
+    val words: JavaDStream<String> = lines.flatMap { it.split(" ").iterator() }
 
+    words.foreachRDD { rdd: JavaRDD<String>, _: Time ->
+        withSpark(rdd) { // this: KSparkSession
             val dataframe: Dataset<TestRow> = rdd.map { TestRow(it) }.toDS()
-
             dataframe
                 .groupByKey { it.word }
                 .count()
                 .show()
         }
-
     }
-
 }