-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcityu.py
More file actions
49 lines (41 loc) · 1.85 KB
/
cityu.py
File metadata and controls
49 lines (41 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# cityu.py
# Import necessary libraries
from requests_html import HTMLSession
from lxml_html_clean import Cleaner
from bs4 import BeautifulSoup
from datetime import datetime
from common_utils import CommonUtils
# URL to scrape
url = "https://www.cityu.edu.hk/calendar/event"
def fetch_events():
session = HTMLSession()
response = session.get(url)
# Directly parse the HTML content without rendering JavaScript
soup = BeautifulSoup(response.content, 'html.parser')
events = soup.find_all('div', class_='views-row')
for event in events:
# 10 Jan 2025 (Fri) 3:00 PM - 4:00 PM
date = event.find('div', class_='event-period-date').get_text(strip=True)
# 3:00 PM - 4:00 PM
time = event.find('div', class_='event-period-time').get_text(strip=True)
categories = event.find('div', class_='event-categories').get_text(strip=True)
title = event.find('div', class_='event-title').get_text(strip=True)
venue = event.find('div', class_='event-venue').get_text(strip=True)
urlHerf = event.find('div', class_='event-title').find('a')['href']
# Extract the start time from the time string
start_time = time.split('-')[0].strip()
# Combine date and start time strings and convert to a datetime object
event_datetime_str = f"{date.split(' (')[0]} {start_time}"
event_datetime = datetime.strptime(event_datetime_str, "%d %b %Y %I:%M %p")
# Check if the event time is after work hours
if CommonUtils.is_time_after_work(event_datetime):
print(f"Title: {title}")
print(f"Date: {date}")
print(f"Time: {time}")
# print(f"Categories: {categories}")
print(f"Venue: {venue}")
print(f"URL: {urlHerf}")
print("-" * 40)
# ...existing code...
if __name__ == "__main__":
fetch_events()