-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtemp.py
More file actions
80 lines (61 loc) · 2.72 KB
/
temp.py
File metadata and controls
80 lines (61 loc) · 2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import csv
import re
# Define the names of your input and output files
INPUT_FILENAME = 'emails.txt'
OUTPUT_FILENAME = 'processed_emails.csv'
def process_email_line(email):
"""
Parses a single email address to extract and capitalize the full name.
"""
# 1. Extract the name part (before the first '@')
name_part = email.split('@')[0]
# 2. Remove the year (4 or more digits) which is typically at the end
name_part_clean = re.sub(r'\d{4,}', '', name_part)
# 3. Split the cleaned name part by the dot '.'
name_components = name_part_clean.split('.')
first_name = name_components[0]
# Check if a last name component exists
last_name = name_components[1] if len(name_components) > 1 else ''
# 4. Capitalize each name part
capitalized_first_name = first_name.title()
capitalized_last_name = last_name.title()
# 5. Combine the names
full_name = f"{capitalized_first_name} {capitalized_last_name}".strip()
return full_name, email
def process_emails_file(input_path, output_path):
"""
Reads emails from an input file, deduplicates and sorts them,
processes them, and writes the results to a new CSV file.
"""
unique_emails = set()
try:
# First pass: Read all emails and store them in a set for deduplication
with open(input_path, 'r', encoding='utf-8') as infile:
for line in infile:
email = line.strip()
if email: # Only consider non-empty lines
unique_emails.add(email)
except FileNotFoundError:
print(f"❌ Error: The input file '{input_path}' was not found. Please make sure it exists.")
return
except Exception as e:
print(f"❌ An unexpected error occurred during reading: {e}")
return
# Convert the set to a list and sort it alphabetically
sorted_emails = sorted(list(unique_emails))
try:
# Second pass: Process the sorted, unique emails and write to CSV
with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
csv_writer = csv.writer(outfile)
# Write the header row
csv_writer.writerow(['Name', 'Email'])
for email in sorted_emails:
# Process the email and get the name
full_name, original_email = process_email_line(email)
# Write the processed data to the CSV file
csv_writer.writerow([full_name, original_email])
print(f"✅ Success! {len(sorted_emails)} unique emails were processed and saved to '{output_path}'.")
except Exception as e:
print(f"❌ An unexpected error occurred during writing: {e}")
# Run the main function
process_emails_file(INPUT_FILENAME, OUTPUT_FILENAME)