-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHTMLReader.java~
More file actions
95 lines (80 loc) · 2.91 KB
/
HTMLReader.java~
File metadata and controls
95 lines (80 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import java.util.Scanner;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.BufferedWriter;
import java.io.IOException;
public class HTMLReader {
public static void main(String[] args) {
File path = new File("/Users/noellependergraft/Documents/Research/AAA/html");
File[] files = path.listFiles();
for (File file:files){
try {
FileWriter output = new FileWriter("AAA.csv");
Scanner scan = new Scanner(file).useDelimiter("\\s+");
String line = scan.nextLine();
String reviewTime = ",\"review\": [";
int reviewNumber = 198;
int rn = 0;
while (!line.contains(reviewTime)) {
if (scan.hasNextLine()) {
line = scan.nextLine();
} else {
break;
}
}
while (scan.hasNextLine()) {
line = scan.nextLine();
String type = "\"@type\": \"";
if (line.contains(type)) {
Scanner t = new Scanner(line).useDelimiter("\"@type\": \"|\",\\s*");
t.next();
output.write(t.next() + "\t");
t.close();
String line2 = scan.nextLine();
Scanner d = new Scanner(line2).useDelimiter("\"datePublished\": \"|\",|-");
d.next();
output.append(d.next() + "\t");
// output.append(d.next() + "\t");
// output.append(d.next().substring(0,2) + "\t");
d.close();
String line3 = scan.nextLine();
Scanner rb = new Scanner(line3).useDelimiter("\"reviewBody\": \"|\",");
rb.next();
output.append(rb.next() + "\t");
scan.nextLine();
rb.close();
String line4 = scan.nextLine();
Scanner at = new Scanner(line4).useDelimiter("\"@type\": \"|\",");
at.next();
output.append(at.next() + "\t");
at.close();
String line5 = scan.nextLine();
Scanner author = new Scanner(line5).useDelimiter("\"name\":\"|\"| of |, ");
author.next();
output.append(author.next() + "\t");
output.append(author.next() + "\t");
output.append(author.next() + "\t");
author.close();
scan.nextLine();
scan.nextLine();
scan.nextLine();
scan.nextLine();
String line6 = scan.nextLine();
Scanner rating = new Scanner(line6).useDelimiter("\"ratingValue\": \"|\",");
rating.next();
output.append(rating.next() + "\n");
rating.close();
rn++;
if (rn == 10) {
break;
}
}
}
output.close();
} catch (IOException e) {
System.out.println("File not found exception.");
}
}
}
}