This repository was archived by the owner on Jul 8, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
109 lines (88 loc) · 4.44 KB
/
parser.py
File metadata and controls
109 lines (88 loc) · 4.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import requests
from bs4 import BeautifulSoup
import re
import json
URL = "https://pronew.chenk.ru/blocks/manage_groups/website/"
COMPLEX = {"Блюхера":"list.php?id=1", "Российская":"list.php?id=3"}
def get_courses(complex): # получение всех групп и курсов
response = requests.get(URL+complex)
soup = BeautifulSoup(response.text, 'html.parser')
courses = soup.find_all('div', class_='spec-year-block-container')
course_dict = {}
for course in courses:
spec_course_blocks = course.find_all('div', class_='spec-year-block')
for spec_course in spec_course_blocks:
year_name = spec_course.find(
'span', class_='spec-year-name').text.strip()
year_name = year_name.replace(":", '')
if year_name not in course_dict:
course_dict[year_name] = {}
groups = spec_course.find_all('span', class_='group-block')
for group in groups:
group_link_tag = group.find('a')
group_name = group_link_tag.text.strip()
group_link = group_link_tag['href'].strip()
course_dict[year_name][group_name] = group_link
return json.dumps(course_dict)
def get_schedule(group): # расписание для указанной группы
response = requests.get(URL+group)
schedule_dict = {}
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
schedule = soup.find('div', class_='timetableContainer') # вся неделя
days = schedule.find_all('td', attrs={'style': True}) # все дни
for day in days:
day_week = day.find('div', class_='dayHeader').text.strip()
day_schedule = day.find(
'div', attrs={'style': 'padding-left: 6px;'}) # расписание дня
lessons_list = {}
lessons = day_schedule.find_all(
'div', class_='lessonBlock') # блок пар
i = 0
for lesson in lessons:
i += 1
lesson_time_block = lesson.find(
'div', class_='lessonTimeBlock').text.strip().split('\n')
lesson_number = lesson_time_block[0].strip()
try:
lesson_time_start = lesson_time_block[1].strip()
lesson_time_finish = lesson_time_block[2].strip()
except:
lesson_time_start = "???"
lesson_time_finish = "???"
lesson_info = {
"time_start": lesson_time_start,
"time_finish": lesson_time_finish,
"lessons": {}
}
lesson_name = None
discBlocks = lesson.find_all('div', class_='discBlock')
for discBlock in discBlocks:
if 'cancelled' in discBlock.get('class', []):
continue
header_div = discBlock.find('div', class_='discHeader')
try:
lesson_name = header_div.find('span').get('title')
lesson_name = re.sub(r'\(.*?\)', '', lesson_name)
lesson_name = lesson_name.strip()
except:
lesson_name = None
lesson_teachers_data = discBlock.find_all('div', class_='discSubgroup')
lesson_data = {}
for subgroup in lesson_teachers_data:
teacher = subgroup.find(
'div', class_='discSubgroupTeacher').text.strip()
classroom = subgroup.find('div', class_='discSubgroupClassroom').text.strip()
classroom = classroom.replace("???", '')
lesson_data[teacher] = classroom
lesson_info['lessons'] = {lesson_name: lesson_data}
if lesson_name is not None and lesson_name != "":
if lesson_number == "??-??":
if i == 1: i = 5
lesson_number = i
lessons_list[lesson_number] = lesson_info
i = int(lesson_number)
schedule_dict[day_week] = lessons_list
else:
schedule_dict['Ошибка'] = f"Ошибка при запросе: {response.status_code}"
return json.dumps(schedule_dict)