diff --git a/.cspell.json b/.cspell.json index f4a0a683..234c79c0 100644 --- a/.cspell.json +++ b/.cspell.json @@ -21,12 +21,14 @@ "pkgs", "psql", "qiita", + "replacen", "reqwest", "rustc", "safify", "stdenv", "supabase", - "swiper" + "swiper", + "zenki" ], "dictionaries": [ "softwareTerms", @@ -52,6 +54,7 @@ "**/*.svg", "**/migration.sql", "**/data.json", + "**/server/src/seeds/json/**", "**/Cargo.*", "scraper/target", "**/rust-toolchain.toml", diff --git a/.gitignore b/.gitignore index 0535f467..1a29dfcb 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ /.direnv /.husky +.cache +data.json + # Logs logs *.log diff --git a/biome.json b/biome.json index 3ff85b17..52e82609 100644 --- a/biome.json +++ b/biome.json @@ -22,6 +22,7 @@ "bun.lockb", "server/target", "data.json", + "server/src/seeds/json", "scraper/target", ".next", "next-env.d.ts", diff --git a/flake.lock b/flake.lock index 8b9b19c0..4b3fd8ff 100644 --- a/flake.lock +++ b/flake.lock @@ -38,11 +38,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1739262174, - "narHash": "sha256-W0436s/nXInSuxfiXKtT2n1nUkNw8Ibddz7w4GAweJ4=", + "lastModified": 1743501102, + "narHash": "sha256-7PCBQ4aGVF8OrzMkzqtYSKyoQuU2jtpPi4lmABpe5X4=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "2a39e74c3ea50a164168215a47b86f72180db76c", + "rev": "02f2af8c8a8c3b2c05028936a1e84daefa1171d4", "type": "github" }, "original": { @@ -54,11 +54,11 @@ }, "nixpkgs-unstable": { "locked": { - "lastModified": 1739019272, - "narHash": "sha256-7Fu7oazPoYCbDzb9k8D/DdbKrC3aU1zlnc39Y8jy/s8=", + "lastModified": 1743472173, + "narHash": "sha256-xwNv3FYTC5pl4QVZ79gUxqCEvqKzcKdXycpH5UbYscw=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "fa35a3c8e17a3de613240fea68f876e5b4896aec", + "rev": "88e992074d86ad50249de12b7fb8dbaadf8dc0c5", "type": "github" }, "original": { @@ -106,11 +106,11 @@ ] }, "locked": { - "lastModified": 1739240901, - "narHash": "sha256-YDtl/9w71m5WcZvbEroYoWrjECDhzJZLZ8E68S3BYok=", + "lastModified": 1743475035, + "narHash": "sha256-uLjVsb4Rxnp1zmFdPCDmdODd4RY6ETOeRj0IkC0ij/4=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "03473e2af8a4b490f4d2cdb2e4d3b75f82c8197c", + "rev": "bee11c51c2cda3ac57c9e0149d94b86cc1b00d13", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 79e4ae47..836c2d26 100644 --- a/flake.nix +++ b/flake.nix @@ -32,7 +32,7 @@ }; unstable = nixpkgs-unstable.legacyPackages.${system}; - rust-bin = pkgs.rust-bin.fromRustupToolchainFile ./scraper/rust-toolchain.toml; + rust-bin = pkgs.rust-bin.beta.latest.default; # pkgs.rust-bin.fromRustupToolchainFile ./scraper/rust-toolchain.toml; prisma = pkgs.callPackage ./server/prisma.nix {inherit prisma-utils;}; common = { @@ -62,8 +62,7 @@ }; in { packages.scraper = pkgs.callPackage ./scraper {toolchain = rust-bin;}; - devShells.default = pkgs.mkShell common; - devShells.scraper = pkgs.mkShell { + devShells.default = pkgs.mkShell { inherit (common) env; packages = common.packages diff --git a/scraper/sample.ts b/scraper/sample.ts new file mode 100644 index 00000000..ea8b12ba --- /dev/null +++ b/scraper/sample.ts @@ -0,0 +1,14 @@ +[ + { + name: "zenki", + courses: [ + { + name: "数理科学基礎", + teacher: "(人名)", + semester: "S1", + period: "月曜2限、水曜1限", + code: "30003 CAS-FC1871L1", + }, + ], + }, +]; diff --git a/scraper/src/io.rs b/scraper/src/io.rs index b6b3cba2..14fb798e 100644 --- a/scraper/src/io.rs +++ b/scraper/src/io.rs @@ -1,6 +1,5 @@ use crate::types::*; use anyhow::ensure; -use sha2::{Digest, Sha256}; use tokio::fs; use tokio::io::AsyncWriteExt; @@ -10,13 +9,20 @@ pub async fn write_to(file: &mut fs::File, content: Entry) -> anyhow::Result<()> Ok(()) } -use crate::CACHE_DIR; +use crate::cache_dir; pub async fn request(url: &str) -> anyhow::Result { println!("[request] sending request to {}", url); - let hash = Sha256::digest(url.as_bytes()); - let path = format!("{CACHE_DIR}/{:x}", hash); + let cache_key = url + .to_string() + .replacen("/", "_", 1000) + .replacen(":", "_", 1000) + .replacen("?", "_", 1000) + .replacen("&", "_", 1000) + .replacen("=", "_", 1000) + .to_string(); + let path = format!("{}/{cache_key}", cache_dir()); if let Ok(bytes) = fs::read(&path).await { if let Ok(text) = String::from_utf8(bytes) { return Ok(text); diff --git a/scraper/src/main.rs b/scraper/src/main.rs index d3fd67b6..e7a5a76b 100644 --- a/scraper/src/main.rs +++ b/scraper/src/main.rs @@ -16,13 +16,16 @@ use scraper::{Html, Selector}; use urls::URLS; const RESULT_FILE: &str = "./data.json"; -const CACHE_DIR: &str = "./.cache"; + +fn cache_dir() -> String { + "./.cache".to_string() +} #[tokio::main(flavor = "multi_thread")] async fn main() { println!("[log] starting..."); - let _ = fs::DirBuilder::new().create(CACHE_DIR).await; + let _ = fs::DirBuilder::new().create(cache_dir()).await; let mut file = fs::File::create(RESULT_FILE) .await @@ -59,7 +62,8 @@ async fn get_courses_of(base_url: &str) -> Vec { futures::future::join_all(courses) .await .into_iter() - .collect::>() + .flatten() + .collect() } lazy_static! { diff --git a/scraper/src/parser.rs b/scraper/src/parser.rs index f3214ed2..fef9ef0c 100644 --- a/scraper/src/parser.rs +++ b/scraper/src/parser.rs @@ -1,6 +1,6 @@ use anyhow::anyhow; use lazy_static::lazy_static; -use scraper::{Html, Selector}; +use scraper::{ElementRef, Html, Selector}; use crate::types::*; @@ -17,19 +17,24 @@ lazy_static! { Selector::parse(".catalog-page-detail-table-cell.code-cell").unwrap(); } -pub fn parse_course_info(html: Html) -> anyhow::Result { - Ok(Course { - name: select(&html, &NAME_SELECTOR, 1)?, - teacher: select(&html, &TEACHER_SELECTOR, 1)?, - semester: select_all(&html, &SEMESTER_SELECTOR, 1)?.join(","), - period: select(&html, &PERIOD_SELECTOR, 1)?, - code: select_all(&html, &CODE_SELECTOR, 1)?.join(" "), - }) +pub fn parse_course_info(html: Html) -> anyhow::Result> { + html.select(&Selector::parse(".catalog-page-detail-table-row").unwrap()) + .skip(1) + .map(|el| { + Ok(Course { + name: select(&el, &NAME_SELECTOR)?, + teacher: select(&el, &TEACHER_SELECTOR)?, + semester: select_all(&el, &SEMESTER_SELECTOR)?.join(","), + period: select(&el, &PERIOD_SELECTOR)?, + code: select_all(&el, &CODE_SELECTOR)?.join(" "), + }) + }) + .collect() } -fn select(html: &Html, selector: &Selector, nth: usize) -> anyhow::Result { - html.select(selector) - .nth(nth) +fn select(el: &ElementRef, selector: &Selector) -> anyhow::Result { + el.select(selector) + .next() .ok_or(anyhow!( "Couldn't find matching element for selector {:?}", selector, @@ -38,12 +43,12 @@ fn select(html: &Html, selector: &Selector, nth: usize) -> anyhow::Result( - html: &'a Html, + html: &'a ElementRef, selector: &'static Selector, - nth: usize, + // nth: usize, ) -> anyhow::Result> { html.select(selector) - .nth(nth) + .next() .ok_or(anyhow!( "Couldn't find matching element for selector {:?}", selector, diff --git a/scraper/src/urls.rs b/scraper/src/urls.rs index aaa7f78c..565cbd50 100644 --- a/scraper/src/urls.rs +++ b/scraper/src/urls.rs @@ -1,4 +1,8 @@ -pub static URLS: [(&str, &str); 10] = [ +pub static URLS: [(&str, &str); 11] = [ + ( + "zenki", + "https://catalog.he.u-tokyo.ac.jp/result?q=&type=all&faculty_id=&facet=%7B%22faculty_type%22%3A%5B%22jd%22%5D%7D&page=", + ), ( "law", "https://catalog.he.u-tokyo.ac.jp/result?type=ug&faculty_id=1&page=", diff --git a/server/src/seeds/insertKoukiCourses.ts b/server/src/seeds/insertKoukiCourses.ts new file mode 100644 index 00000000..75e01937 --- /dev/null +++ b/server/src/seeds/insertKoukiCourses.ts @@ -0,0 +1,106 @@ +import * as fs from "node:fs"; +import * as path from "node:path"; + +import { prisma } from "../database/client"; + +// 後期 (scraper) 形式のデータを読み込む。 +const FILE_PATH = path.join(__dirname, "data.json"); + +// sample +// [ +// { +// name: "zenki", +// courses: [ +// { +// name: "数理科学基礎", +// teacher: "(人名)", +// semester: "S1,S2", +// period: "月曜2限、水曜1限", +// code: "30003 CAS-FC1871L1", +// }, +// ], +// }, +// ]; + +async function main() { + const jsonData: { + courses: { + name: string; + teacher: string; + semester: string; + period: string; + code: string; + }[]; + }[] = JSON.parse(fs.readFileSync(FILE_PATH, "utf-8")); + console.log(jsonData); + + const coursesData = jsonData[0].courses + .filter((course) => course.semester.split("")[0] === "S") + .map((course) => { + const { code, name, teacher } = course; + return { + id: code.split(" ")[0], + name: name, + teacher: teacher, + }; + }); + + await prisma.course.createMany({ + data: coursesData, + }); + + const slotsData: { + day: "mon" | "tue" | "wed" | "thu" | "fri" | "sat" | "sun" | "other"; + period: number; + courseId: string; + }[] = []; + + for (const courseData of jsonData[0].courses) { + const { code, period } = courseData; + + if (courseData.semester.split("")[0] !== "S") continue; + + for (const p of period.split("、")) { + const [dayJp, periodStr] = p.split("曜"); + const day = + dayJp === "月" + ? "mon" + : dayJp === "火" + ? "tue" + : dayJp === "水" + ? "wed" + : dayJp === "木" + ? "thu" + : dayJp === "金" + ? "fri" + : dayJp === "土" + ? "sat" + : dayJp === "日" + ? "sun" + : "other"; + + slotsData.push({ + day, + period: Number.parseInt(periodStr?.split("")[0]) || 0, + courseId: code.split(" ")[0], + }); + } + } + + await prisma.slot.createMany({ + data: slotsData, + skipDuplicates: true, + }); + + console.log("Data inserted successfully!"); +} + +main() + .then(async () => { + await prisma.$disconnect(); + }) + .catch(async (e) => { + console.error(e); + await prisma.$disconnect(); + process.exit(1); + });