-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
77 lines (61 loc) · 2.67 KB
/
main.py
File metadata and controls
77 lines (61 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from googleapiclient.discovery import build
from auth import authenticate_google_services
from check import safe_browsing_check, validate_url_format
from dup import duplicate_sheet_with_timestamp
from read import get_sheet_id_by_name, read_google_sheet
from script import generate_sql_file
# Define the scopes required
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
# Authenticate and create the service client
credentials = authenticate_google_services(SCOPES)
service = build("sheets", "v4", credentials=credentials)
# Use the service to read from the spreadsheet
spreadsheet_id = "1mpREMogZv4ojxtJUHwwAxaoVvzq08ymU7yn7WXKIACM" # Replace with your actual spreadsheet ID
range_name = "Social Profile Links!A1:Z100"
data = read_google_sheet(service, spreadsheet_id, range_name)
bad_urls = []
unsafe_urls = []
valid_urls = []
if data:
header = data[0]
processed_urls = [] # To keep track of URLs and their status
for row in data[1:]:
if len(row) < 2 or not row[1].strip(): # Check for empty URLs
continue
creator = str.lower(
row[0]
) # Assuming the creator's name is in the first column
for idx, r in enumerate(row[1:]):
if r == "" or r.isnumeric():
break
url = r.strip()
is_valid = validate_url_format(url)
column_name = header[idx + 1].replace(" ", "").lower()
# Initially add all non-empty URLs to processed_urls with a preliminary validity status
if is_valid:
processed_urls.append((url, creator, is_valid, column_name))
else:
bad_urls.append((url, creator, column_name))
# Check the safety of all URLs
all_urls = [url for url, _, _, _ in processed_urls]
unsafe_urls = safe_browsing_check(all_urls) # Assuming this function is defined
# Final categorization
for url, creator, is_valid, column_name in processed_urls:
if url in unsafe_urls or not is_valid:
bad_urls.append((url, creator, column_name))
else:
valid_urls.append((url, creator, column_name))
# Output results
if bad_urls:
print("Bad or unsafe URLs with creators:")
for url, creator in bad_urls:
print(f"URL: {url}, Creator: {creator}")
generate_sql_file(valid_urls, env="prd")
# Optionally, duplicate the sheet for archival
sheet_id = get_sheet_id_by_name(service, spreadsheet_id, "Social Profile Links")
if sheet_id is not None:
duplicate_sheet_with_timestamp(service, spreadsheet_id, sheet_id, "Archived ")
else:
print("Sheet not found.")
else:
print("No data returned from the spreadsheet.")