-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathversion1.py
More file actions
118 lines (90 loc) · 3.93 KB
/
version1.py
File metadata and controls
118 lines (90 loc) · 3.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import string
from PyPDF2 import PdfReader
import os
current_dir=os.path.dirname(os.path.abspath(__file__))
resume_file_path=os.path.join(current_dir,"resume.pdf")
job_description_file_path=os.path.join(current_dir,"job_description.txt")
class ATSScanner:
def __init__(self):
# A manual list of "Stop Words" (Words to ignore)
# In a real app, you'd use NLTK or Spacy libraries for this.
self.STOP_WORDS = {
"and", "the", "is", "in", "at", "of", "or", "a", "an", "to", "for",
"with", "on", "as", "by", "we", "are", "you", "your", "it", "be",
"that", "which", "from", "this", "will", "can", "have", "has",
"but", "not", "if", "job", "description", "resume", "work", "experience"
}
def clean_text(self, text):
"""
1. Lowercases everything.
2. Removes punctuation (.,!?:).
3. Removes stop words.
4. Returns a SET of unique keywords.
"""
# Lowercase
text = text.lower()
# Remove punctuation using a translation table
# This replaces every punctuation mark with None (deletes it)
text = text.translate(str.maketrans('', '', string.punctuation))
# Split into a list of words
words = text.split()
# Filter out stop words and create a SET (Unique words only)
# This is the secret sauce. Sets are instant to lookup.
keywords = {word for word in words if word not in self.STOP_WORDS}
return keywords
def extract_text_from_resume(self,input_file):
try:
reader=PdfReader(input_file)
text=""
for page in reader.pages:
text+=page.extract_text()
return text
except Exception as e:
return f"Error:{e} has occurred"
def scan(self, resume_text, job_desc_text):
resume_set = self.clean_text(resume_text)
jd_set = self.clean_text(job_desc_text)
# 1. Calculate the Match
# Intersection (&) finds words in BOTH sets
matches = resume_set.intersection(jd_set)
# 2. Calculate Missing Keywords
# Difference (-) finds words in JD but NOT in Resume
missing = jd_set.difference(resume_set)
# 3. Calculate Score
# (Matches / Total Unique JD Words) * 100
if len(jd_set) == 0:
return 0, set(), set()
score = (len(matches) / len(jd_set)) * 100
return score, matches, missing
# --- RUNNER CODE ---
if __name__ == "__main__":
scanner = ATSScanner()
print("--- ATS RESUME HACKER v1.0 ---")
if resume_file_path.endswith(".pdf"):
resume_input=scanner.extract_text_from_resume(input_file=resume_file_path)
else:
with open(resume_file_path,"rb") as filp:
resume_input=filp.read()
with open(job_description_file_path,"r",encoding="utf-8") as filp:
jd_input=filp.read()
# Run the Logic
score, matches, missing = scanner.scan(resume_input, jd_input)
# Output the Report
print("\n" + "="*30)
print(f"MATCH SCORE: {score:.1f}%")
print("="*30)
print(f"\n✅ MATCHED KEYWORDS ({len(matches)}):")
print(", ".join(sorted(list(matches))))
print(f"\n⚠️ MISSING CRITICAL KEYWORDS ({len(missing)}):")
# We highlight these in RED using ANSI codes (Terminal hack)
RED = "\033[91m"
RESET = "\033[0m"
for word in sorted(list(missing)):
print(f"{RED}- {word}{RESET}")
print("\n" + "="*30)
if score < 50:
print("Verdict: 🗑️ AUTO-REJECTED. You need to add the missing keywords.")
elif score < 80:
print("Verdict: ⚠️ MAYBE. Human might read it if lucky.")
else:
print("Verdict: 🚀 INTERVIEW LIKELY. Great job.")