-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathLogAnalyzerRunner.scala
More file actions
36 lines (30 loc) · 1.22 KB
/
LogAnalyzerRunner.scala
File metadata and controls
36 lines (30 loc) · 1.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
package com.cloudwick.spark.loganalysis
import com.cloudwick.logging.LazyLogging
import org.apache.spark.{SparkContext, SparkConf}
/**
* Spark job to analyze apache http log events
* - Aggregates globally total number of times a status code's (200, 404, 503, ...) have been
* encountered
* - Aggregates per minute hits received by the web server
* - Aggregates counts based on Country & City the request originated from using GeoLocation lookup
*
* @author ashrith
*/
object LogAnalyzerRunner extends LazyLogging {
def main(args: Array[String]) {
if (args.length != 2) {
System.err.println("Usage: LogAnalyzerRunner input_path output_path")
System.exit(1)
}
val Array(inputPath, outputPath) = args
val conf = new SparkConf().setAppName("LogAnalyzerRunner")
val sc = new SparkContext(conf)
val lines = sc.textFile(inputPath)
val statusCounts = LogAnalyzer.statusCounter(lines)
val volumeCounter = LogAnalyzer.volumeCounter(lines)
val countryCounter = LogAnalyzer.countryCounter(lines)
statusCounts.saveAsTextFile(outputPath + "/status_counts")
volumeCounter.saveAsTextFile(outputPath + "/volume_counts")
countryCounter.saveAsTextFile(outputPath + "/country_counts")
}
}