Skip to content

Commit 2256cbc

Browse files
Update citation timestamp only after successful fetch
1 parent 776f3e2 commit 2256cbc

1 file changed

Lines changed: 59 additions & 55 deletions

File tree

bin/update_scholar_citations.py

Lines changed: 59 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,20 @@ def get_scholar_citations():
2525
"""
2626
print(f"Fetching citations for Google Scholar ID: {SCHOLAR_USER_ID}")
2727

28-
# Initialize citation data structure
2928
citation_data = {
30-
'metadata': {
31-
'last_updated': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
32-
},
33-
'papers': {} # Initialize as empty dict, not None
29+
'metadata': {},
30+
'papers': {}
3431
}
3532

36-
# Try to load existing data first to avoid unnecessary requests
33+
# Try to load existing data first to preserve the last known good values.
3734
if os.path.exists(OUTPUT_FILE):
3835
try:
3936
with open(OUTPUT_FILE, 'r') as f:
4037
existing_data = yaml.safe_load(f)
4138
if existing_data and isinstance(existing_data, dict):
42-
# Keep existing metadata if available
43-
if 'papers' in existing_data and existing_data['papers'] is not None:
39+
if isinstance(existing_data.get('metadata'), dict):
40+
citation_data['metadata'] = existing_data['metadata']
41+
if existing_data.get('papers') is not None:
4442
citation_data['papers'] = existing_data['papers']
4543
except Exception as e:
4644
print(f"Warning: Could not read existing citation data: {e}")
@@ -59,55 +57,60 @@ def get_scholar_citations():
5957
print(f"Retrying in {wait_time:.1f} seconds...")
6058
time.sleep(wait_time)
6159
else:
62-
print("All retries failed. Using existing data if available.")
63-
return citation_data
60+
raise RuntimeError("All Google Scholar fetch retries failed") from e
6461

6562
if not author_data:
66-
print("Could not fetch author data")
67-
return citation_data
68-
69-
# Process publications
70-
if 'publications' in author_data:
71-
for pub in author_data['publications']:
72-
try:
73-
# Get publication ID
74-
pub_id = None
75-
if 'pub_id' in pub and pub['pub_id']:
76-
pub_id = pub['pub_id']
77-
elif 'author_pub_id' in pub and pub['author_pub_id']:
78-
pub_id = pub['author_pub_id']
79-
80-
if not pub_id:
81-
print(f"Warning: No ID found for publication: {pub.get('bib', {}).get('title', 'Unknown')}")
82-
continue
83-
84-
# Get publication metadata
85-
title = "Unknown Title"
86-
year = "Unknown Year"
87-
citations = 0
88-
89-
if 'bib' in pub:
90-
if 'title' in pub['bib']:
91-
title = pub['bib']['title']
92-
if 'pub_year' in pub['bib']:
93-
year = str(pub['bib']['pub_year'])
94-
95-
if 'num_citations' in pub:
96-
citations = pub['num_citations']
97-
98-
print(f"Found: {title} ({year}) - Citations: {citations}")
99-
100-
# Store citation data
101-
citation_data['papers'][pub_id] = {
102-
'title': title,
103-
'year': year,
104-
'citations': citations
105-
}
106-
107-
except Exception as e:
108-
print(f"Error processing publication: {str(e)}")
109-
else:
110-
print("No publications found in author data")
63+
raise RuntimeError("Could not fetch author data")
64+
65+
publications = author_data.get('publications')
66+
if not publications:
67+
raise RuntimeError("No publications found in author data")
68+
69+
fetched_papers = 0
70+
for pub in publications:
71+
try:
72+
# Get publication ID
73+
pub_id = None
74+
if 'pub_id' in pub and pub['pub_id']:
75+
pub_id = pub['pub_id']
76+
elif 'author_pub_id' in pub and pub['author_pub_id']:
77+
pub_id = pub['author_pub_id']
78+
79+
if not pub_id:
80+
print(f"Warning: No ID found for publication: {pub.get('bib', {}).get('title', 'Unknown')}")
81+
continue
82+
83+
# Get publication metadata
84+
title = "Unknown Title"
85+
year = "Unknown Year"
86+
citations = 0
87+
88+
if 'bib' in pub:
89+
if 'title' in pub['bib']:
90+
title = pub['bib']['title']
91+
if 'pub_year' in pub['bib']:
92+
year = str(pub['bib']['pub_year'])
93+
94+
if 'num_citations' in pub:
95+
citations = pub['num_citations']
96+
97+
print(f"Found: {title} ({year}) - Citations: {citations}")
98+
99+
# Store citation data
100+
citation_data['papers'][pub_id] = {
101+
'title': title,
102+
'year': year,
103+
'citations': citations
104+
}
105+
fetched_papers += 1
106+
107+
except Exception as e:
108+
print(f"Error processing publication: {str(e)}")
109+
110+
if fetched_papers == 0:
111+
raise RuntimeError("Google Scholar fetch completed but no publications could be processed")
112+
113+
citation_data['metadata']['last_updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
111114

112115
# Save to YAML file
113116
try:
@@ -116,6 +119,7 @@ def get_scholar_citations():
116119
print(f"Citation data saved to {OUTPUT_FILE}")
117120
except Exception as e:
118121
print(f"Error saving citation data: {str(e)}")
122+
raise
119123

120124
return citation_data
121125

0 commit comments

Comments
 (0)