-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathinvert.java
More file actions
95 lines (58 loc) · 2.64 KB
/
invert.java
File metadata and controls
95 lines (58 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import java.util.*;
import java.io.*;
public class invert {
public static void vocab(String filename){
try {
Hashtable<String, Integer> hashtable = new Hashtable<String, Integer>();
BufferedReader br = new BufferedReader(new FileReader(filename));
String line = null;
while( (line = br.readLine()) != null){
String[] array = line.split("\t");
int wordID = Integer.parseInt(array[0]);
int stemmedWordID = Integer.parseInt(array[3]);
String word = array[1];
if(wordID == stemmedWordID){
hashtable.put(word, stemmedWordID);
}
}
br.close();
File dirName = new File("fdsee_invindex");
if(!dirName.exists()){
dirName.mkdir();
}
List<String> keys = new ArrayList<String>(hashtable.keySet());
Collections.sort(keys);
PrintWriter voc = new PrintWriter("fdsee_invindex/fdsee_vocabulary.txt", "UTF-8");
for(String key : keys){
int value = hashtable.get(key);
String f = "fdsee_invindex/" + key ;
PrintWriter pw = new PrintWriter(f, "UTF-8");
br = new BufferedReader(new FileReader("fdsee_token.txt"));
int nDocs = 0;
int nHits = 0;
int pDocID = -1;
while( (line = br.readLine()) != null){
line = line.substring(1, line.length() -1);
String[] lineArray = line.split(",");
int wID = Integer.parseInt(lineArray[1]);
if(value == wID){
int docID = Integer.parseInt(lineArray[0]);
int wOffset = Integer.parseInt(lineArray[2]);
int attr = Integer.parseInt(lineArray[3]);
if(docID != pDocID)
nDocs++;
pDocID = docID;
nHits++;
pw.println(docID + "," + wOffset + "," + attr);
}
}
voc.println(key + "," + value + "," + nDocs + "," + nHits);
br.close();
pw.close();
}
voc.close();
} catch(Exception ex){
ex.printStackTrace();
}
}
}