@@ -25,22 +25,20 @@ def get_scholar_citations():
2525 """
2626 print (f"Fetching citations for Google Scholar ID: { SCHOLAR_USER_ID } " )
2727
28- # Initialize citation data structure
2928 citation_data = {
30- 'metadata' : {
31- 'last_updated' : datetime .now ().strftime ("%Y-%m-%d %H:%M:%S" )
32- },
33- 'papers' : {} # Initialize as empty dict, not None
29+ 'metadata' : {},
30+ 'papers' : {}
3431 }
3532
36- # Try to load existing data first to avoid unnecessary requests
33+ # Try to load existing data first to preserve the last known good values.
3734 if os .path .exists (OUTPUT_FILE ):
3835 try :
3936 with open (OUTPUT_FILE , 'r' ) as f :
4037 existing_data = yaml .safe_load (f )
4138 if existing_data and isinstance (existing_data , dict ):
42- # Keep existing metadata if available
43- if 'papers' in existing_data and existing_data ['papers' ] is not None :
39+ if isinstance (existing_data .get ('metadata' ), dict ):
40+ citation_data ['metadata' ] = existing_data ['metadata' ]
41+ if existing_data .get ('papers' ) is not None :
4442 citation_data ['papers' ] = existing_data ['papers' ]
4543 except Exception as e :
4644 print (f"Warning: Could not read existing citation data: { e } " )
@@ -59,55 +57,60 @@ def get_scholar_citations():
5957 print (f"Retrying in { wait_time :.1f} seconds..." )
6058 time .sleep (wait_time )
6159 else :
62- print ("All retries failed. Using existing data if available." )
63- return citation_data
60+ raise RuntimeError ("All Google Scholar fetch retries failed" ) from e
6461
6562 if not author_data :
66- print ("Could not fetch author data" )
67- return citation_data
68-
69- # Process publications
70- if 'publications' in author_data :
71- for pub in author_data ['publications' ]:
72- try :
73- # Get publication ID
74- pub_id = None
75- if 'pub_id' in pub and pub ['pub_id' ]:
76- pub_id = pub ['pub_id' ]
77- elif 'author_pub_id' in pub and pub ['author_pub_id' ]:
78- pub_id = pub ['author_pub_id' ]
79-
80- if not pub_id :
81- print (f"Warning: No ID found for publication: { pub .get ('bib' , {}).get ('title' , 'Unknown' )} " )
82- continue
83-
84- # Get publication metadata
85- title = "Unknown Title"
86- year = "Unknown Year"
87- citations = 0
88-
89- if 'bib' in pub :
90- if 'title' in pub ['bib' ]:
91- title = pub ['bib' ]['title' ]
92- if 'pub_year' in pub ['bib' ]:
93- year = str (pub ['bib' ]['pub_year' ])
94-
95- if 'num_citations' in pub :
96- citations = pub ['num_citations' ]
97-
98- print (f"Found: { title } ({ year } ) - Citations: { citations } " )
99-
100- # Store citation data
101- citation_data ['papers' ][pub_id ] = {
102- 'title' : title ,
103- 'year' : year ,
104- 'citations' : citations
105- }
106-
107- except Exception as e :
108- print (f"Error processing publication: { str (e )} " )
109- else :
110- print ("No publications found in author data" )
63+ raise RuntimeError ("Could not fetch author data" )
64+
65+ publications = author_data .get ('publications' )
66+ if not publications :
67+ raise RuntimeError ("No publications found in author data" )
68+
69+ fetched_papers = 0
70+ for pub in publications :
71+ try :
72+ # Get publication ID
73+ pub_id = None
74+ if 'pub_id' in pub and pub ['pub_id' ]:
75+ pub_id = pub ['pub_id' ]
76+ elif 'author_pub_id' in pub and pub ['author_pub_id' ]:
77+ pub_id = pub ['author_pub_id' ]
78+
79+ if not pub_id :
80+ print (f"Warning: No ID found for publication: { pub .get ('bib' , {}).get ('title' , 'Unknown' )} " )
81+ continue
82+
83+ # Get publication metadata
84+ title = "Unknown Title"
85+ year = "Unknown Year"
86+ citations = 0
87+
88+ if 'bib' in pub :
89+ if 'title' in pub ['bib' ]:
90+ title = pub ['bib' ]['title' ]
91+ if 'pub_year' in pub ['bib' ]:
92+ year = str (pub ['bib' ]['pub_year' ])
93+
94+ if 'num_citations' in pub :
95+ citations = pub ['num_citations' ]
96+
97+ print (f"Found: { title } ({ year } ) - Citations: { citations } " )
98+
99+ # Store citation data
100+ citation_data ['papers' ][pub_id ] = {
101+ 'title' : title ,
102+ 'year' : year ,
103+ 'citations' : citations
104+ }
105+ fetched_papers += 1
106+
107+ except Exception as e :
108+ print (f"Error processing publication: { str (e )} " )
109+
110+ if fetched_papers == 0 :
111+ raise RuntimeError ("Google Scholar fetch completed but no publications could be processed" )
112+
113+ citation_data ['metadata' ]['last_updated' ] = datetime .now ().strftime ("%Y-%m-%d %H:%M:%S" )
111114
112115 # Save to YAML file
113116 try :
@@ -116,6 +119,7 @@ def get_scholar_citations():
116119 print (f"Citation data saved to { OUTPUT_FILE } " )
117120 except Exception as e :
118121 print (f"Error saving citation data: { str (e )} " )
122+ raise
119123
120124 return citation_data
121125
0 commit comments