-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcomplexity.py
More file actions
138 lines (111 loc) · 4.84 KB
/
complexity.py
File metadata and controls
138 lines (111 loc) · 4.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# This file includes code adapted from https://github.com/HyperKuvid-Labs/FrugalSOT/blob/main/src/prompt.py
# licensed under the Apache License 2.0.
# Modifications: Refactored into a class structure and added detailed complexity analysis.
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk, sent_tokenize
class ComplexityLevels:
high: str = "high"
mid: str = "med"
low: str = "low"
class ClassifyPrompt:
def __init__(self, prompt: str):
required_packages = [
'tokenizers/punkt',
'taggers/averaged_perceptron_tagger',
'chunkers/maxent_ne_chunker',
'corpora/words',
'tokenizers/punkt_tab',
'taggers/averaged_perceptron_tagger_eng',
'chunkers/maxent_ne_chunker_tab'
]
for package in required_packages:
try:
nltk.data.find(package)
except LookupError:
nltk.download(package.split('/')[-1])
self.prompt = prompt
self.tokens = word_tokenize(prompt)
self.pos_tags = pos_tag(self.tokens)
self.ner_tree = ne_chunk(self.pos_tags)
def __call__(self):
return self.final_complexity()
def length_complexity(self):
length = len(self.prompt.split())
if length <= 7:
length_complexity = ComplexityLevels.low
elif 8 <= length <= 15:
length_complexity = ComplexityLevels.mid
else:
length_complexity = ComplexityLevels.high
return length_complexity
def semantic_complexity(self):
entity_count = sum(1 for chunk in self.ner_tree if hasattr(chunk, 'label'))
if entity_count <= 1:
ner_complexity = ComplexityLevels.low
elif entity_count == 2:
ner_complexity = ComplexityLevels.mid
else:
ner_complexity = ComplexityLevels.high
return ner_complexity
def syntactic_complexity(self):
conj_count = sum(1 for _, tag in self.pos_tags if tag in {'CC'}) # Conjunctions
sub_clause_count = sum(1 for _, tag in self.pos_tags if tag in {'IN', 'TO'}) # Subordinate clauses
sentences = sent_tokenize(self.prompt)
num_sentences = len(sentences)
avg_sentence_length = len(self.tokens) / num_sentences if num_sentences > 0 else 0
complexity_score = (
conj_count + sub_clause_count + (1 if avg_sentence_length > 12 else 0)
)
if complexity_score <= 1:
return ComplexityLevels.low
elif complexity_score == 2:
return ComplexityLevels.mid
else:
return ComplexityLevels.high
def final_complexity(self):
length_comp = self.length_complexity()
semantic_comp = self.semantic_complexity()
syntactic_comp = self.syntactic_complexity()
length_score = 0 if length_comp == ComplexityLevels.low else (2 if length_comp == ComplexityLevels.mid else 4)
semantic_score = 0 if semantic_comp == ComplexityLevels.low else (2 if semantic_comp == ComplexityLevels.mid else 4)
syntactic_score = 0 if syntactic_comp == ComplexityLevels.low else (2 if syntactic_comp == ComplexityLevels.mid else 4)
print(f"Length: {length_comp}, Semantic: {semantic_comp}, Syntactic: {syntactic_comp}")
total_score = length_score*1 + semantic_score*2 + syntactic_score*3
if total_score <= 3:
return ComplexityLevels.low
elif 4 <= total_score <= 8:
return ComplexityLevels.mid
else:
return ComplexityLevels.high
@staticmethod
def get_complexity(prompt: str):
classifier = ClassifyPrompt(prompt)
return classifier.final_complexity()
example_prompts = {
"low": [
"What is AI?",
"Define gravity.",
"Who is the president?",
"List three colors.",
"What is Python?"
],
"mid": [
"Explain the process of photosynthesis.",
"How does a car engine work?",
"Describe the water cycle in brief.",
"What are the benefits of exercise?",
"Summarize the plot of Romeo and Juliet."
],
"high": [
"Analyze the impact of climate change on global agriculture and suggest mitigation strategies.",
"Compare and contrast machine learning and deep learning with examples.",
"Discuss the ethical implications of artificial intelligence in healthcare.",
"Evaluate the effectiveness of renewable energy sources in reducing carbon emissions.",
"Explain the process of DNA replication and its significance in genetic inheritance."
]
}
if __name__ == "__main__":
for complexity_level, prompts in example_prompts.items():
for prompt in prompts:
complexity = ClassifyPrompt.get_complexity(prompt)
print(f" Prompt: {prompt}\nPrompt Complexity Level: {complexity}\n\n")