-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdbscrape.py
More file actions
71 lines (51 loc) · 2.62 KB
/
dbscrape.py
File metadata and controls
71 lines (51 loc) · 2.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#requires BeautifulSoup4, requests, and mysql/connector packages to run
from bs4 import BeautifulSoup
import requests
import mysql.connector
r = requests.get("https://apps.concord.edu/schedules/seatstaken.php")
soup = BeautifulSoup(r.text, "html.parser")
connection = mysql.connector.connect(user='', host='', port='', password = '',database='')
cursor = connection.cursor()
delete_course = ("DELETE FROM courses ")
cursor.execute(delete_course)
tables = soup.findAll("table", id="classtable")
for table in tables:
for row in table.findAll("tr"):
cells = row.findAll("td")
if len(cells) > 0:
if cells[0].get_text() != "CRN":
start = ':'
end = 'B'
crn = cells[0].get_text()
subject = cells[1].get_text()
crs = cells[2].get_text()
section = cells[3].get_text()
title = cells[4].get_text()
ch = cells[5].get_text()
if(cells[6].get_text() == "0"):
maxSeats = "0";
else:
maxSeats = cells[6].get_text().split(start)[1].split(end)[0]
enrolled = cells[7].get_text()
available = cells[8].get_text()
wl = cells[9].get_text()
days = cells[10].get_text()
stime = cells[11].get_text()
etime = cells[12].get_text()
buildingRoom = cells[13].get_text().split()
building = buildingRoom[0]
room = buildingRoom[1].replace("(", "").replace(")", "")
wk = cells[14].get_text()
instructor = cells[15].get_text()
ef = cells[16].get_text()
startDate = cells[17].get_text()
add_course = ("INSERT INTO courses "
"(CRN, Subject, CRS, Section, Title, CH, MaxSeats, Enrolled, AvailableSeats, WL, "
"Days, STIME, ETIME, Building, Room, WK, Instructor, EF, StartDate) "
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
course_values = (crn, subject, crs, section, title, ch, maxSeats, enrolled, available, wl, days, stime, etime, building, room, wk, instructor, ef, startDate)
cursor.execute(add_course, course_values)
print("Added {0:7} {1:7} {2:30} {3:10}".format(crn, subject, title, instructor))
connection.commit()
cursor.close()
connection.close()