-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathentity_extractor.py
More file actions
40 lines (34 loc) · 1.87 KB
/
entity_extractor.py
File metadata and controls
40 lines (34 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import re
class EntityExtractor:
def extract(self, text):
text_lower = text.lower()
entities = {"marks": None, "condition": None, "limit": None}
# Detect a numerical limit for top performers or explicit limits.
limit_match = re.search(r"\b(?:top|first|highest|show|display)\s+(\d+)\b", text_lower)
if not limit_match:
limit_match = re.search(r"\blimit\s+(\d+)\b", text_lower)
if not limit_match:
limit_match = re.search(r"\b(\d+)\s+(?:students|performers|results|records)\b", text_lower)
if limit_match:
entities["limit"] = int(limit_match.group(1))
if re.search(r"\b(at least|minimum|greater than or equal to|>=)\b", text_lower):
entities["condition"] = ">="
elif re.search(r"\b(at most|maximum|less than or equal to|<=)\b", text_lower):
entities["condition"] = "<="
elif re.search(r"\b(above|greater than|more than)\b", text_lower):
entities["condition"] = ">"
elif re.search(r"\b(below|less than|under)\b", text_lower):
entities["condition"] = "<"
elif re.search(r"\b(equal to|equals|exactly|=)\b", text_lower):
entities["condition"] = "="
numeric_values = [int(num) for num in re.findall(r"\b(\d{1,3})\b", text_lower)]
if numeric_values:
if entities["condition"] is not None:
entities["marks"] = numeric_values[0]
elif entities["limit"] is not None and not any(cond in text_lower for cond in ["above", "below", "greater", "less", "minimum", "maximum", "equal", "exactly"]):
pass
elif any(word in text_lower for word in ["marks", "score", "grade"]):
entities["marks"] = numeric_values[0]
if entities["condition"] is None:
entities["condition"] = "="
return entities