-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDocument.java
More file actions
73 lines (61 loc) · 2.17 KB
/
Document.java
File metadata and controls
73 lines (61 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import java.util.*;
import java.util.stream.Stream;
public class Document {
String docID;
HashMap <String, Integer> terms; //key - term, value - counter
HashMap <String, Integer> entities; //key - term, value - counter
int length;
public Document(String docID) {
terms = new HashMap<>();
entities = new HashMap<>();
this.docID = docID;
length = 1;
}
public int getMaxTF (){
int maxTF = 0;
for (String term: terms.keySet()) {
int termTF = terms.get(term);
if (termTF > maxTF){
maxTF = termTF;
}
}
return maxTF;
}
public String get5Entities (){
LinkedHashMap<String, Integer > sortedEntitiesByCount = new LinkedHashMap<>();
entities.entrySet()
.stream()
.sorted(new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
})
.forEachOrdered(x -> sortedEntitiesByCount.put(x.getKey(), x.getValue()));
String entitiesString = "";
double maxTF = 0;
int flag = 0;
for (HashMap.Entry<String, Integer> ent: sortedEntitiesByCount.entrySet()) {
if (flag ==5){
break;
}
if (flag ==0){
maxTF = ent.getValue();
}
double rate = ent.getValue()/maxTF;
entitiesString = entitiesString + ","+rate+" " +ent.getKey();
flag++;
}
return entitiesString;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(docID); //doc ID
sb.append(" "+getMaxTF()); //MaxTF
sb.append(" "+terms.size()); //number of unique words
sb.append(" "+length); //length
sb.append(" "+get5Entities()); //5 Entities
return sb.toString();
}
}