diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..bf221b00 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +__pycache__/ +*.pyc +*.pyo + +venv/ +.venv/ +env/ + +.vscode/ +.idea/ +.DS_Store + +*.log + +.env \ No newline at end of file diff --git a/README.md b/README.md index 163d41b9..59542afd 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,5 @@ # Безопасность веб-приложений. Лабораторка №2 -## Схема сдачи - -1. Получить задание -2. Сделать форк данного репозитория -3. Выполнить задание согласно полученному варианту -4. Сделать PR (pull request) в данный репозиторий -6. Исправить замечания после code review -7. Получить approve -8. Прийти на занятие и защитить работу - Что нужно проявить в работе: - умение разработать завершенное целое веб-приложение, с клиентской и серверной частями (допустимы открытые АПИ) - навыки верстки на html в объеме 200-300 тегов @@ -28,34 +18,4 @@ - справочники групп, табличные данные по расписаниям добывать с настоящего сайта на серверной стороне приложения - в клиентскую часть подгружать эти сведения динамически по JSON-API - обеспечить возможность смотреть расписания в разрезе группы или препода -- обеспечить возможность выбора учебной недели (по умолчанию выбирается автоматически) - -## Вариант 2. Аналог Прибывалки для электричек - -Сделать веб-версию Прибывалки, только для электричек - -Какие нужны возможности: -- находить желаемую ЖД-станцию поиском по названию и по карте -- отображать расписания всех проходящих поездов через выбранную станцию -- отображать расписания для поездов между двумя станциями -- работа через АПИ Яндекс.Расписаний https://yandex.ru/dev/rasp/doc/ru/ (доступ получите сами) -- хорошая работа в условиях экрана смартфона -- бонус: функция "любимых остановок" - -## Вариант 3. Прогноз погоды - -Сделать одностраничный сайт с картой, на которой можно выбрать населенный пункт и получить прогноз погоды на несколько дней по нему. - -Какие нужны возможности: - - увидеть на карте точки с населенными пунктами. Координаты населенных пунктов взять из https://tochno.st/datasets/allsettlements - но все 150 тысяч не нужно, выберите 1 тысячу с самым большим населением. - - при нажатии на точку получить всплывающее окошко с графиками изменения температуры, осадков, силы ветра. API для прогнозов возьмите с https://projecteol.ru/ru/ с соблюдением правил. - - графики рисовать каким-нибудь приличным компонентом, например, https://www.chartjs.org/ - - находить населенный пункт по названию - - можете реализовать с собственным серверным компонентом или придумать, как обойтись без него - - - - - - - +- обеспечить возможность выбора учебной недели (по умолчанию выбирается автоматически) \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/models.py b/app/models.py new file mode 100644 index 00000000..43da1ff9 --- /dev/null +++ b/app/models.py @@ -0,0 +1,34 @@ +from typing import List, Optional, Tuple +from dataclasses import dataclass + +@dataclass +class Lesson: + time_start: str + time_end: str + subject: str + lesson_type: str + room: Optional[str] + teachers: List[dict] + groups: List[dict] + subgroup: Optional[str] + comment: Optional[str] + +@dataclass +class DaySchedule: + weekday: str + date: Optional[str] + date_iso: Optional[str] + lessons: List[Lesson] + +@dataclass +class WeekSchedule: + week_number: Optional[int] + week_label: Optional[str] + week_dates: Optional[str] + week_start_date: Optional[str] + week_end_date: Optional[str] + prev_week: Optional[int] + next_week: Optional[int] + entity_name: str + days: List[DaySchedule] + time_slots: List[Tuple[str, str]] \ No newline at end of file diff --git a/app/parser.py b/app/parser.py new file mode 100644 index 00000000..2532e8ec --- /dev/null +++ b/app/parser.py @@ -0,0 +1,257 @@ +import re +from datetime import datetime +from bs4 import BeautifulSoup +from typing import List, Tuple, Optional +from .models import Lesson, DaySchedule, WeekSchedule + +def parse_schedule(html: str): + """Парсит HTML с расписанием и возвращает структурированные данные""" + soup = BeautifulSoup(html, "lxml") + + container = soup.select_one("div.container.timetable") + if not container: + container = soup.find("div", class_=re.compile(r"\btimetable\b")) + if not container: + raise ValueError("Не найден контейнер с расписанием") + + entity_name = "" + h2 = container.find("h2") + if h2: + entity_name = h2.get_text(" ", strip=True) + if not entity_name: + h1 = container.find("h1") + if h1: + text = h1.get_text(" ", strip=True) + if "," in text: + entity_name = text.split(",", 1)[-1].strip() + else: + entity_name = text.strip() + if not entity_name: + entity_name = "—" + + week_label = None + week_elem = container.select_one(".week-nav-current_week") + if week_elem: + week_label = week_elem.get_text(" ", strip=True) + + week_number = None + if week_label: + match = re.search(r"(\d+)", week_label) + if match: + week_number = int(match.group(1)) + + prev_week = None + next_week = None + prev_link = container.select_one(".week-nav-prev") + if prev_link: + href = prev_link.get("href", "") + match = re.search(r"selectedWeek=(\d+)", href) + if match: + prev_week = int(match.group(1)) + + next_link = container.select_one(".week-nav-next") + if next_link: + href = next_link.get("href", "") + match = re.search(r"selectedWeek=(\d+)", href) + if match: + next_week = int(match.group(1)) + + schedule_grid = container.select_one(".schedule .schedule__items") or \ + container.select_one(".schedule__items") + if not schedule_grid: + raise ValueError("Не найден блок schedule__items") + + rows = schedule_grid.find_all(recursive=False) + if not rows: + raise ValueError("Пустой блок расписания") + + headers = [] + row_idx = 0 + while row_idx < len(rows): + row_class = rows[row_idx].get("class") or [] + if "schedule__head" in row_class: + headers.append(rows[row_idx]) + row_idx += 1 + else: + break + + if len(headers) < 2: + raise ValueError("Не удалось найти заголовки дней недели") + + day_headers = headers[1:] + + days: List[DaySchedule] = [] + for header in day_headers: + header_text = header.get_text(" ", strip=True) + + date_match = re.search(r"(\d{2}\.\d{2}\.\d{4})", header_text) + date_str = date_match.group(1) if date_match else None + + dt = None + if date_str: + try: + dt = datetime.strptime(date_str.strip(), "%d.%m.%Y") + except ValueError: + pass + + weekday = header_text.replace(date_str, "").strip() if date_str else header_text.strip() + weekday = re.sub(r"\s+", " ", weekday) or "—" + + days.append(DaySchedule( + weekday=weekday, + date=date_str, + date_iso=dt.date().isoformat() if dt else None, + lessons=[] + )) + + time_slots: List[Tuple[str, str]] = [] + + while row_idx < len(rows): + time_row = rows[row_idx] + row_idx += 1 + + if "schedule__time" not in (time_row.get("class") or []): + continue + + time_items = time_row.select(".schedule__time-item") + times = [item.get_text(" ", strip=True) for item in time_items] + times = [t for t in times if re.search(r"\d{1,2}:\d{2}", t)] + + time_start = times[0] if len(times) >= 2 else "" + time_end = times[1] if len(times) >= 2 else "" + + if time_start and time_end: + if not time_slots or time_slots[-1] != (time_start, time_end): + time_slots.append((time_start, time_end)) + + for day_index in range(len(days)): + if row_idx >= len(rows): + break + cell = rows[row_idx] + row_idx += 1 + + lessons_in_cell = cell.find_all("div", class_="schedule__lesson", recursive=False) + + for lesson_elem in lessons_in_cell: + lesson_type = lesson_elem.select_one(".schedule__lesson-type-chip") + if not lesson_type: + lesson_type = lesson_elem.select_one(".schedule__lesson-type") + lesson_type_text = lesson_type.get_text(" ", strip=True) if lesson_type else "—" + + subject_elem = lesson_elem.select_one(".schedule__discipline") + if not subject_elem: + subject_elem = lesson_elem.select_one(".schedule__discipline-name") + subject = subject_elem.get_text(" ", strip=True) if subject_elem else "—" + + room_elem = lesson_elem.select_one(".schedule__place") + room = room_elem.get_text(" ", strip=True) if room_elem else None + + teachers = [] + teacher_block = lesson_elem.select_one(".schedule__teacher") + if teacher_block: + for link in teacher_block.select('a[href*="staffId="]'): + name = link.get_text(" ", strip=True) + staff_id = None + match = re.search(r"staffId=(\d+)", link.get("href", "")) + if match: + staff_id = int(match.group(1)) + if name: + teachers.append({"staff_id": staff_id, "name": name}) + + groups = [] + groups_block = lesson_elem.select_one(".schedule__groups") + if groups_block: + for link in groups_block.select('a[href*="groupId="]'): + name = link.get_text(" ", strip=True) + group_id = None + match = re.search(r"groupId=(\d+)", link.get("href", "")) + if match: + group_id = int(match.group(1)) + if name: + groups.append({"group_id": group_id, "name": name}) + + subgroup = None + for span in lesson_elem.select("span.caption-text"): + text = span.get_text(" ", strip=True) + if text and "подгрупп" in text.lower(): + match = re.search(r":\s*(.+)$", text) + if match: + subgroup = match.group(1).strip() + else: + parts = text.split() + if parts: + subgroup = parts[-1].strip() + + comment = None + comment_elem = lesson_elem.select_one(".schedule__comment") + if comment_elem: + comment = comment_elem.get_text(" ", strip=True) + if comment and "подгрупп" in comment.lower(): + comment = None + + lesson = Lesson( + time_start=time_start, + time_end=time_end, + subject=subject, + lesson_type=lesson_type_text, + room=room, + teachers=teachers, + groups=groups, + subgroup=subgroup, + comment=comment + ) + days[day_index].lessons.append(lesson) + + valid_dates = [] + for d in days: + if d.date: + try: + dt = datetime.strptime(d.date, "%d.%m.%Y") + valid_dates.append(dt) + except ValueError: + pass + + week_dates = None + week_start = None + week_end = None + + if valid_dates: + min_date = min(valid_dates) + max_date = max(valid_dates) + week_start = min_date.date().isoformat() + week_end = max_date.date().isoformat() + week_dates = f"{min_date.strftime('%d.%m.%Y')} - {max_date.strftime('%d.%m.%Y')}" + + return { + "week_number": week_number, + "week_label": week_label or (f"{week_number} неделя" if week_number is not None else None), + "week_dates": week_dates, + "week_start_date": week_start, + "week_end_date": week_end, + "prev_week": prev_week, + "next_week": next_week, + "entity_name": entity_name, + "days": [ + { + "weekday": day.weekday, + "date": day.date, + "date_iso": day.date_iso, + "lessons": [ + { + "time_start": lesson.time_start, + "time_end": lesson.time_end, + "subject": lesson.subject, + "lesson_type": lesson.lesson_type, + "room": lesson.room, + "teachers": lesson.teachers, + "groups": lesson.groups, + "subgroup": lesson.subgroup, + "comment": lesson.comment + } + for lesson in day.lessons + ] + } + for day in days + ], + "time_slots": [{"time_start": ts[0], "time_end": ts[1]} for ts in time_slots] + } \ No newline at end of file diff --git a/app/ssau_client.py b/app/ssau_client.py new file mode 100644 index 00000000..babda614 --- /dev/null +++ b/app/ssau_client.py @@ -0,0 +1,162 @@ +import re +import requests +from typing import List, Tuple, Optional, Dict +from urllib.parse import urljoin, urlparse, parse_qs +from bs4 import BeautifulSoup + +SSAU_BASE = "https://ssau.ru" + +class SsauClient: + def __init__(self, timeout_seconds: float = 25.0): + self._session = requests.Session() + self._session.headers.update({ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Accept": "text/html,application/json;q=0.9,*/*;q=0.8", + "Accept-Language": "ru-RU,ru;q=0.9,en;q=0.5", + }) + self._timeout = timeout_seconds + self._cached_csrf: Optional[str] = None + + def fetch_html(self, url: str) -> str: + response = self._session.get(url, timeout=self._timeout) + response.raise_for_status() + return response.text + + def _ensure_csrf_token(self) -> str: + if self._cached_csrf: + return self._cached_csrf + + html = self.fetch_html(urljoin(SSAU_BASE, "/rasp")) + + match = re.search(r'name="csrf-token"\s+content="([^"]+)"', html, re.IGNORECASE) + if not match: + match = re.search(r"name='csrf-token'\s+content='([^']+)'", html, re.IGNORECASE) + + if not match: + raise RuntimeError("Не удалось найти csrf-token на странице /rasp") + + self._cached_csrf = match.group(1) + return self._cached_csrf + + def get_institutes(self) -> List[Tuple[int, str]]: + """Получает список институтов/факультетов""" + html = self.fetch_html(urljoin(SSAU_BASE, "/rasp")) + soup = BeautifulSoup(html, "lxml") + + links = soup.select('a[href*="/rasp/faculty/"]') + institutes: Dict[int, Tuple[int, str]] = {} + + for a in links: + href = a.get("href") + if not href: + continue + + abs_url = urljoin(SSAU_BASE, href) + match = re.search(r"/rasp/faculty/(\d+)", abs_url) + if not match: + continue + + faculty_id = int(match.group(1)) + name = a.get_text(" ", strip=True) + + if name and faculty_id not in institutes: + institutes[faculty_id] = (faculty_id, name) + + return sorted(institutes.values(), key=lambda x: x[1].lower()) + + def get_available_courses(self, faculty_id: int) -> List[int]: + """Получает доступные курсы для института""" + url = urljoin(SSAU_BASE, f"/rasp/faculty/{faculty_id}?course=1") + html = self.fetch_html(url) + soup = BeautifulSoup(html, "lxml") + + courses: set = set() + + for a in soup.select('a[href*="course="]'): + href = a.get("href") or "" + try: + parsed = urlparse(urljoin(SSAU_BASE, href)) + params = parse_qs(parsed.query) + if "course" in params: + course_num = int(params["course"][0]) + if 1 <= course_num <= 5: + courses.add(course_num) + except (ValueError, TypeError): + continue + + if not courses: + courses = {1, 2, 3, 4, 5} + + return sorted(courses) + + def get_groups_by_course(self, faculty_id: int, course: int) -> List[Tuple[int, str]]: + """Получает группы для института и курса""" + url = urljoin(SSAU_BASE, f"/rasp/faculty/{faculty_id}?course={course}") + html = self.fetch_html(url) + soup = BeautifulSoup(html, "lxml") + + groups: Dict[int, Tuple[int, str]] = {} + + for a in soup.select('a[href*="groupId="]'): + href = a.get("href") + if not href: + continue + + abs_url = urljoin(SSAU_BASE, href) + parsed = urlparse(abs_url) + params = parse_qs(parsed.query) + + if "groupId" not in params: + continue + + try: + group_id = int(params["groupId"][0]) + name = a.get_text(" ", strip=True) + if name and group_id not in groups: + groups[group_id] = (group_id, name) + except (ValueError, TypeError): + continue + + return sorted(groups.values(), key=lambda x: x[1]) + + def search_teachers(self, query: str) -> List[dict]: + """Ищет преподавателей по запросу""" + csrf = self._ensure_csrf_token() + + response = self._session.post( + urljoin(SSAU_BASE, "/rasp/search"), + data={"text": query}, + headers={ + "X-CSRF-TOKEN": csrf, + "Content-Type": "application/x-www-form-urlencoded", + "X-Requested-With": "XMLHttpRequest" + }, + timeout=self._timeout + ) + response.raise_for_status() + + data = response.json() + teachers = [] + + for item in data: + if "staffId=" in item.get("url", ""): + teachers.append({ + "id": item.get("id"), + "name": item.get("text", "") + }) + + return teachers + + def get_schedule_html(self, group_id: int, week: int = 0) -> str: + """Получает HTML страницы расписания группы""" + url = urljoin(SSAU_BASE, f"/rasp?groupId={group_id}") + if week != 0: + url += f"&selectedWeek={week}" + return self.fetch_html(url) + + def get_teacher_schedule_html(self, staff_id: int, week: int = 0) -> str: + """Получает HTML страницы расписания преподавателя""" + url = urljoin(SSAU_BASE, f"/rasp?staffId={staff_id}") + if week != 0: + url += f"&selectedWeek={week}" + return self.fetch_html(url) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 00000000..9555160c --- /dev/null +++ b/main.py @@ -0,0 +1,103 @@ +from fastapi import FastAPI, Query, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from fastapi.responses import RedirectResponse +import uvicorn +import os + +from app.ssau_client import SsauClient +from app.parser import parse_schedule + +app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"], +) + +client = SsauClient() + +@app.get("/api/institutes") +async def get_institutes(): + try: + institutes = client.get_institutes() + return {"institutes": [{"id": inst_id, "name": name} for inst_id, name in institutes]} + except Exception as e: + print(f"Ошибка: {e}") + return {"institutes": []} + +@app.get("/api/groups") +async def get_groups( + institute_id: int = Query(None), + course: int = Query(None) +): + if course is None: + try: + courses = client.get_available_courses(institute_id) + return {"available_courses": courses} + except Exception as e: + print(f"Ошибка получения курсов: {e}") + return {"available_courses": [1, 2, 3, 4, 5]} + else: + try: + groups = client.get_groups_by_course(institute_id, course) + return {"groups": [{"id": group_id, "name": name} for group_id, name in groups]} + except Exception as e: + print(f"Ошибка получения групп: {e}") + return {"groups": []} + +@app.get("/api/teachers") +async def search_teachers(q: str = Query("", min_length=2)): + if len(q) < 2: + return {"teachers": []} + + try: + teachers = client.search_teachers(q) + return {"teachers": teachers} + except Exception as e: + print(f"Ошибка поиска: {e}") + return {"teachers": []} + +@app.get("/api/schedule/group") +async def get_schedule_group( + group_id: int = Query(..., description="ID группы"), + week: int = Query(0, description="Номер недели (0 - текущая)") +): + try: + html = client.get_schedule_html(group_id, week) + schedule = parse_schedule(html) + return schedule + except Exception as e: + print(f"Ошибка получения расписания: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/schedule/teacher") +async def get_schedule_teacher( + staff_id: int = Query(..., description="ID преподавателя"), + week: int = Query(0, description="Номер недели (0 - текущая)") +): + try: + html = client.get_teacher_schedule_html(staff_id, week) + schedule = parse_schedule(html) + return schedule + except Exception as e: + print(f"Ошибка получения расписания: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +STATIC_DIR = os.path.join(BASE_DIR, "static") + +if not os.path.exists(STATIC_DIR): + os.makedirs(STATIC_DIR) + +app.mount("/static", StaticFiles(directory=STATIC_DIR, html=True), name="static") + +@app.get("/") +async def root(): + return RedirectResponse(url="/static/index.html") + +if __name__ == "__main__": + print("Сервер запущен: http://127.0.0.1:8000") + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..44993fc8 Binary files /dev/null and b/requirements.txt differ diff --git a/static/index.html b/static/index.html new file mode 100644 index 00000000..128a8720 --- /dev/null +++ b/static/index.html @@ -0,0 +1,84 @@ + + +
+ + +| Время | '; + $.each(days, function(i, day) { + html += '' + day.weekday + (day.date ? ' ' + day.date + '' : '') + ' | ';
+ });
+ html += '|
|---|---|---|
| ' + timeStr + ' | '; + + $.each(days, function(j, day) { + let lessons = $.grep(day.lessons, function(l) { + return l.time_start === slot.time_start; + }); + + if (lessons.length) { + html += '';
+ $.each(lessons, function(k, lesson) {
+ html += ' ';
+ html += ' ';
+ });
+ html += '' + escapeHtml(lesson.subject) + ' ';
+ if (lesson.lesson_type) html += '' + escapeHtml(lesson.lesson_type) + '';
+ if (lesson.room) html += 'ауд. ' + escapeHtml(lesson.room) + ' ';
+ if (lesson.teachers && lesson.teachers.length) {
+ html += '' + escapeHtml(lesson.teachers[0].name) + ' ';
+ }
+ html += ' | ';
+ } else {
+ html += '— | '; + } + }); + html += '