Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
"pkgs",
"psql",
"qiita",
"replacen",
"reqwest",
"rustc",
"safify",
"stdenv",
"supabase",
"swiper"
"swiper",
"zenki"
],
"dictionaries": [
"softwareTerms",
Expand All @@ -52,6 +54,7 @@
"**/*.svg",
"**/migration.sql",
"**/data.json",
"**/server/src/seeds/json/**",
"**/Cargo.*",
"scraper/target",
"**/rust-toolchain.toml",
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
/.direnv
/.husky

.cache
data.json

# Logs
logs
*.log
Expand Down
1 change: 1 addition & 0 deletions biome.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"bun.lockb",
"server/target",
"data.json",
"server/src/seeds/json",
"scraper/target",
".next",
"next-env.d.ts",
Expand Down
18 changes: 9 additions & 9 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
};
unstable = nixpkgs-unstable.legacyPackages.${system};

rust-bin = pkgs.rust-bin.fromRustupToolchainFile ./scraper/rust-toolchain.toml;
rust-bin = pkgs.rust-bin.beta.latest.default; # pkgs.rust-bin.fromRustupToolchainFile ./scraper/rust-toolchain.toml;
prisma = pkgs.callPackage ./server/prisma.nix {inherit prisma-utils;};

common = {
Expand Down Expand Up @@ -62,8 +62,7 @@
};
in {
packages.scraper = pkgs.callPackage ./scraper {toolchain = rust-bin;};
devShells.default = pkgs.mkShell common;
devShells.scraper = pkgs.mkShell {
devShells.default = pkgs.mkShell {
inherit (common) env;
packages =
common.packages
Expand Down
14 changes: 14 additions & 0 deletions scraper/sample.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[
{
name: "zenki",
courses: [
{
name: "数理科学基礎",
teacher: "(人名)",
semester: "S1",
period: "月曜2限、水曜1限",
code: "30003 CAS-FC1871L1",
},
],
},
];
14 changes: 10 additions & 4 deletions scraper/src/io.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::types::*;
use anyhow::ensure;
use sha2::{Digest, Sha256};
use tokio::fs;
use tokio::io::AsyncWriteExt;

Expand All @@ -10,13 +9,20 @@ pub async fn write_to(file: &mut fs::File, content: Entry) -> anyhow::Result<()>
Ok(())
}

use crate::CACHE_DIR;
use crate::cache_dir;

pub async fn request(url: &str) -> anyhow::Result<String> {
println!("[request] sending request to {}", url);

let hash = Sha256::digest(url.as_bytes());
let path = format!("{CACHE_DIR}/{:x}", hash);
let cache_key = url
.to_string()
.replacen("/", "_", 1000)
.replacen(":", "_", 1000)
.replacen("?", "_", 1000)
.replacen("&", "_", 1000)
.replacen("=", "_", 1000)
.to_string();
let path = format!("{}/{cache_key}", cache_dir());
if let Ok(bytes) = fs::read(&path).await {
if let Ok(text) = String::from_utf8(bytes) {
return Ok(text);
Expand Down
10 changes: 7 additions & 3 deletions scraper/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@ use scraper::{Html, Selector};
use urls::URLS;

const RESULT_FILE: &str = "./data.json";
const CACHE_DIR: &str = "./.cache";

fn cache_dir() -> String {
"./.cache".to_string()
}

#[tokio::main(flavor = "multi_thread")]
async fn main() {
println!("[log] starting...");

let _ = fs::DirBuilder::new().create(CACHE_DIR).await;
let _ = fs::DirBuilder::new().create(cache_dir()).await;

let mut file = fs::File::create(RESULT_FILE)
.await
Expand Down Expand Up @@ -59,7 +62,8 @@ async fn get_courses_of(base_url: &str) -> Vec<Course> {
futures::future::join_all(courses)
.await
.into_iter()
.collect::<Vec<_>>()
.flatten()
.collect()
}

lazy_static! {
Expand Down
35 changes: 20 additions & 15 deletions scraper/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::anyhow;
use lazy_static::lazy_static;
use scraper::{Html, Selector};
use scraper::{ElementRef, Html, Selector};

use crate::types::*;

Expand All @@ -17,19 +17,24 @@ lazy_static! {
Selector::parse(".catalog-page-detail-table-cell.code-cell").unwrap();
}

pub fn parse_course_info(html: Html) -> anyhow::Result<Course> {
Ok(Course {
name: select(&html, &NAME_SELECTOR, 1)?,
teacher: select(&html, &TEACHER_SELECTOR, 1)?,
semester: select_all(&html, &SEMESTER_SELECTOR, 1)?.join(","),
period: select(&html, &PERIOD_SELECTOR, 1)?,
code: select_all(&html, &CODE_SELECTOR, 1)?.join(" "),
})
pub fn parse_course_info(html: Html) -> anyhow::Result<Vec<Course>> {
html.select(&Selector::parse(".catalog-page-detail-table-row").unwrap())
.skip(1)
.map(|el| {
Ok(Course {
name: select(&el, &NAME_SELECTOR)?,
teacher: select(&el, &TEACHER_SELECTOR)?,
semester: select_all(&el, &SEMESTER_SELECTOR)?.join(","),
period: select(&el, &PERIOD_SELECTOR)?,
code: select_all(&el, &CODE_SELECTOR)?.join(" "),
})
})
.collect()
}

fn select(html: &Html, selector: &Selector, nth: usize) -> anyhow::Result<String> {
html.select(selector)
.nth(nth)
fn select(el: &ElementRef, selector: &Selector) -> anyhow::Result<String> {
el.select(selector)
.next()
.ok_or(anyhow!(
"Couldn't find matching element for selector {:?}",
selector,
Expand All @@ -38,12 +43,12 @@ fn select(html: &Html, selector: &Selector, nth: usize) -> anyhow::Result<String
}

fn select_all<'a>(
html: &'a Html,
html: &'a ElementRef,
selector: &'static Selector,
nth: usize,
// nth: usize,
) -> anyhow::Result<Vec<&'a str>> {
html.select(selector)
.nth(nth)
.next()
.ok_or(anyhow!(
"Couldn't find matching element for selector {:?}",
selector,
Expand Down
6 changes: 5 additions & 1 deletion scraper/src/urls.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
pub static URLS: [(&str, &str); 10] = [
pub static URLS: [(&str, &str); 11] = [
(
"zenki",
"https://catalog.he.u-tokyo.ac.jp/result?q=&type=all&faculty_id=&facet=%7B%22faculty_type%22%3A%5B%22jd%22%5D%7D&page=",
),
(
"law",
"https://catalog.he.u-tokyo.ac.jp/result?type=ug&faculty_id=1&page=",
Expand Down
106 changes: 106 additions & 0 deletions server/src/seeds/insertKoukiCourses.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import * as fs from "node:fs";
import * as path from "node:path";

import { prisma } from "../database/client";

// 後期 (scraper) 形式のデータを読み込む。
const FILE_PATH = path.join(__dirname, "data.json");

// sample
// [
// {
// name: "zenki",
// courses: [
// {
// name: "数理科学基礎",
// teacher: "(人名)",
// semester: "S1,S2",
// period: "月曜2限、水曜1限",
// code: "30003 CAS-FC1871L1",
// },
// ],
// },
// ];

async function main() {
const jsonData: {
courses: {
name: string;
teacher: string;
semester: string;
period: string;
code: string;
}[];
}[] = JSON.parse(fs.readFileSync(FILE_PATH, "utf-8"));
console.log(jsonData);

const coursesData = jsonData[0].courses
.filter((course) => course.semester.split("")[0] === "S")
.map((course) => {
const { code, name, teacher } = course;
return {
id: code.split(" ")[0],
name: name,
teacher: teacher,
};
});

await prisma.course.createMany({
data: coursesData,
});

const slotsData: {
day: "mon" | "tue" | "wed" | "thu" | "fri" | "sat" | "sun" | "other";
period: number;
courseId: string;
}[] = [];

for (const courseData of jsonData[0].courses) {
const { code, period } = courseData;

if (courseData.semester.split("")[0] !== "S") continue;

for (const p of period.split("、")) {
const [dayJp, periodStr] = p.split("曜");
const day =
dayJp === "月"
? "mon"
: dayJp === "火"
? "tue"
: dayJp === "水"
? "wed"
: dayJp === "木"
? "thu"
: dayJp === "金"
? "fri"
: dayJp === "土"
? "sat"
: dayJp === "日"
? "sun"
: "other";

slotsData.push({
day,
period: Number.parseInt(periodStr?.split("")[0]) || 0,
courseId: code.split(" ")[0],
});
}
}

await prisma.slot.createMany({
data: slotsData,
skipDuplicates: true,
});

console.log("Data inserted successfully!");
}

main()
.then(async () => {
await prisma.$disconnect();
})
.catch(async (e) => {
console.error(e);
await prisma.$disconnect();
process.exit(1);
});
Loading
Loading