@@ -635,92 +635,75 @@ def video_scrape(
635635 headers = {"User-Agent" : "Mozilla/5.0" }
636636
637637 try :
638- rspn = requests .get (url , headers = headers , timeout = 10 )
639- rspn .raise_for_status ()
638+ resp = requests .get (url , headers = headers , timeout = 10 )
639+ resp .raise_for_status ()
640640 except Exception :
641641 return result
642642
643- soup = BeautifulSoup (rspn .text , "html.parser" )
644- tag = soup .find ("script" , {"id" : "serialized-server-data" })
643+ soup = BeautifulSoup (resp .text , "html.parser" )
644+
645+ # Load JSON blob
646+ tag = soup .find ("script" , id = "serialized-server-data" )
645647 if not tag :
646648 return result
647649
648650 try :
649651 data = json .loads (tag .text )
650652 sections = data [0 ]["data" ]["sections" ]
651- except ( KeyError , IndexError , json . JSONDecodeError ) :
653+ except Exception :
652654 return result
653655
654- music_video_header = None
655- more = None
656- similar = None
656+ # Extract relevant sections
657+ music_video_header = next ((s for s in sections if "music-video-header" in s .get ("id" , "" )), None )
658+ more_sec = next ((s for s in sections if "more-by-artist" in s .get ("id" , "" )), None )
659+ similar_sec = next ((s for s in sections if "more-in-genre" in s .get ("id" , "" )), None )
657660
658- for sec in sections :
659- sec_id = sec .get ("id" , "" )
660- if "music-video-header" in sec_id :
661- music_video_header = sec
662- elif "more-by-artist" in sec_id :
663- more = sec
664- elif "more-in-genre" in sec_id :
665- similar = sec
666-
667- # TITLE
661+ # Parse main item
668662 item = (music_video_header or {}).get ("items" , [{}])[0 ]
669663 result ["title" ] = item .get ("title" , "" )
670664
671- # IMAGE
672- try :
673- artwork = item . get ( "artwork" , {}). get ( "dictionary" , {})
665+ # Artwork
666+ artwork = item . get ( "artwork" , {}). get ( "dictionary" , {})
667+ if artwork :
674668 result ["image" ] = get_cover (
675669 artwork .get ("url" , "" ),
676670 artwork .get ("width" , 0 ),
677- artwork .get ("height" , 0 ),
671+ artwork .get ("height" , 0 )
678672 )
679- except Exception :
680- pass
681673
682- # ARTIST
683- try :
684- sl = item .get ("subtitleLinks" , [])[0 ]
685- result ["artist" ]["title" ] = sl .get ("title" , "" )
686- result ["artist" ]["url" ] = (
687- sl ["segue" ]["actionMetrics" ]
688- ["data" ][0 ]["fields" ]["actionUrl" ]
689- )
690- except Exception :
691- pass
692-
693- # VIDEO URL
694- try :
695- json_tag = soup .find (
696- "script" ,
697- {
698- "id" : "schema:music-video" ,
699- "type" : "application/ld+json"
700- }
701- )
702- schema_data = json .loads (json_tag .string )
703- result ["video-url" ] = schema_data ["video" ]["contentUrl" ]
704- except (AttributeError , KeyError , TypeError , json .JSONDecodeError ):
705- pass
674+ # Artist info
675+ sl = item .get ("subtitleLinks" , [{}])[0 ]
676+ result ["artist" ]["title" ] = sl .get ("title" , "" )
677+ result ["artist" ]["url" ] = (
678+ sl .get ("segue" , {})
679+ .get ("actionMetrics" , {})
680+ .get ("data" , [{}])[0 ]
681+ .get ("fields" , {})
682+ .get ("actionUrl" , "" )
683+ )
706684
707- # MORE BY ARTIST
708- try :
709- for m in more .get ("items" , []):
685+ # Video URL (from JSON-LD)
686+ schema_tag = soup .find ("script" , id = "schema:music-video" )
687+ if schema_tag :
688+ try :
689+ schema_data = json .loads (schema_tag .string )
690+ result ["video-url" ] = schema_data .get ("video" , {}).get ("contentUrl" , "" )
691+ except Exception :
692+ pass
693+
694+ # More by artist
695+ if more_sec :
696+ for m in more_sec .get ("items" , []):
710697 url = safe_action_url (m )
711698 if url :
712699 result ["more" ].append (url )
713- except Exception :
714- pass
715700
716- # SIMILAR
717- try :
718- for s in similar .get ("items" , []):
701+ # Similar videos
702+ if similar_sec :
703+ for s in similar_sec .get ("items" , []):
719704 url = safe_action_url (s )
720705 if url :
721706 result ["similar" ].append (url )
722- except Exception :
723- pass
724707
725708 return result
726709
@@ -955,6 +938,3 @@ def test_all_functions():
955938 print ("artist_scrape ERROR:" , e )
956939
957940 print ("\n === ALL TESTS COMPLETED ===" )
958-
959-
960- # test_all_functions()
0 commit comments