-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSaveToDatabase.scala
More file actions
31 lines (26 loc) · 1000 Bytes
/
SaveToDatabase.scala
File metadata and controls
31 lines (26 loc) · 1000 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import org.apache.spark.{SparkConf, SparkContext}
import com.mongodb.spark._
import com.mongodb.spark.config._
import org.bson.Document
import scala.collection.JavaConverters._
object SavedToDatabase {
def main(args: Array[String]): Unit = {
println("Hey!")
val conf = new SparkConf()
.setAppName("Save Words to MongoDB")
.setMaster("local[*]")
val sc = new SparkContext(conf)
val textFileRDD = sc.textFile("data/wholeInvertedIndex/part-00000")
val documentsRDD = textFileRDD.map { line =>
val parts = line.split(", ")
val word = parts(0)
val docCount = parts(1).toInt
val docIds = parts.drop(2).toSeq.asJava // to get all elements except the first two elements
new Document("word", word)
.append("docCount", docCount)
.append("docIds", docIds)
}
documentsRDD.collect().take(10).foreach(println)
documentsRDD.saveToMongoDB(WriteConfig(Map("uri" -> "mongodb://localhost:27017/BigData.dictionary")))
}
}