diff --git a/backend/package.json b/backend/package.json index c1ea6b6..74d256c 100644 --- a/backend/package.json +++ b/backend/package.json @@ -21,7 +21,10 @@ "test:cov": "jest --coverage", "test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand", "test:e2e": "jest --config ./test/jest-e2e.json", - "test:bench": "jest --rootDir . test/bench/performance.spec.ts" + "test:bench": "jest --rootDir . test/bench/performance.spec.ts", + "test:bench:posts": "jest --config ./test/bench/jest-bench.json", + "bench:setup": "./test/bench/generate-posts.sh", + "bench:run": "jest --config ./test/bench/jest-bench.json" }, "dependencies": { "@nestjs/common": "^10.0.0", diff --git a/backend/src/instances/blog/blog.service.ts b/backend/src/instances/blog/blog.service.ts index 2d38507..10a5bba 100644 --- a/backend/src/instances/blog/blog.service.ts +++ b/backend/src/instances/blog/blog.service.ts @@ -16,18 +16,32 @@ import { @Injectable() export class BlogService implements OnModuleInit { - private readonly POSTS_ROOT_PATH = join(process.cwd(), "../posts"); + private readonly POSTS_ROOT_PATH = + process.env.BLOG_POSTS_PATH || join(process.cwd(), "../posts"); private readonly INDEX_FILE_PATH = join(this.POSTS_ROOT_PATH, "posts.jsonl"); public readonly logger = new Logger(BlogService.name); private totalPostCount = 0; + private postsCache: PostFrontMatter[] = []; private fileProcessor = new FileProcessor(); async onModuleInit() { this.logger.log("BlogService 초기화를 진행합니다..."); try { await this.ensureIndex(); - this.totalPostCount = await this.countPosts(); + + const posts: PostFrontMatter[] = []; + const stream = createReadStream(this.INDEX_FILE_PATH); + const rl = createInterface({ input: stream, crlfDelay: Infinity }); + + for await (const line of rl) { + if (line.trim()) { + posts.push(JSON.parse(line)); + } + } + this.postsCache = posts; + this.totalPostCount = this.postsCache.length; + this.logger.log( `블로그 서비스가 성공적으로 초기화되었습니다. 총 게시물 수: ${this.totalPostCount}`, ); @@ -97,24 +111,9 @@ export class BlogService implements OnModuleInit { } private async *readIndexLines(): AsyncGenerator { - const stream = createReadStream(this.INDEX_FILE_PATH); - const rl = createInterface({ input: stream, crlfDelay: Infinity }); - - for await (const line of rl) { - if (line.trim()) { - yield JSON.parse(line); - } - } - } - - private async countPosts(): Promise { - let count = 0; - const stream = createReadStream(this.INDEX_FILE_PATH); - const rl = createInterface({ input: stream, crlfDelay: Infinity }); - for await (const _ of rl) { - count++; + for (const post of this.postsCache) { + yield post; } - return count; } private async ensureIndex() { diff --git a/backend/src/main.ts b/backend/src/main.ts index 69c076a..67be4f3 100644 --- a/backend/src/main.ts +++ b/backend/src/main.ts @@ -10,7 +10,7 @@ async function bootstrap() { const originString = configService.get("CORS_ORIGIN") || ""; const allowedOrigins = originString.split(",").map((url) => url.trim()); - app.set("turst proxy", 1); + app.set("trust proxy", 1); app.enableCors({ origin: allowedOrigins, diff --git a/backend/test/bench/generate-posts.sh b/backend/test/bench/generate-posts.sh new file mode 100755 index 0000000..c0b0327 --- /dev/null +++ b/backend/test/bench/generate-posts.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +# 생성할 파일 개수 (전체) - 첫 번째 인자로 받음 (기본값: 250000) +NUM_FILES=${1:-50000} + +# tmp 디렉토리 경로 (현재 스크립트가 실행되는 디렉토리를 기준으로 함) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TMP_DIR="$SCRIPT_DIR/tmp/posts" + +# 카테고리 배열 정의 +CATEGORIES=("web" "algorithm" "cs" "code") +NUM_CATEGORIES=${#CATEGORIES[@]} + +# 각 카테고리별 파일 개수 계산 (균등 분배) +FILES_PER_CATEGORY=$((NUM_FILES / NUM_CATEGORIES)) +REMAINING_FILES=$((NUM_FILES % NUM_CATEGORIES)) + +echo "대상 디렉토리: $TMP_DIR" +echo "카테고리별 파일 개수: $FILES_PER_CATEGORY개 (나머지 $REMAINING_FILES개는 첫 번째 카테고리에 추가)" + +# tmp 디렉토리가 없으면 생성 +if [ ! -d "$TMP_DIR" ]; then + mkdir -p "$TMP_DIR" + echo "'$TMP_DIR' 디렉토리가 생성되었습니다." +else + echo "'$TMP_DIR' 디렉토리가 이미 존재합니다. 기존 파일이 덮어쓰여질 수 있습니다." + # 선택적: 기존 파일 정리 + # rm -rf "$TMP_DIR"/* +fi + +# 각 카테고리 디렉토리 생성 +for category in "${CATEGORIES[@]}"; do + CATEGORY_DIR="$TMP_DIR/$category" + if [ ! -d "$CATEGORY_DIR" ]; then + mkdir -p "$CATEGORY_DIR" + echo "'$CATEGORY_DIR' 디렉토리가 생성되었습니다." + fi +done + +echo "$NUM_FILES개의 더미 .md 파일 생성을 시작합니다 (카테고리별 폴더 구조)..." + +START_TIME=$(date +%s) # 시작 시간 기록 + +# 긴 Lorem ipsum 텍스트 템플릿 +# 약 1KB 정도의 텍스트입니다. 이를 100번 반복하면 파일당 100KB가 됩니다. +LOREM_IPSUM_CHUNK="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +" +# 본문을 100번 반복하여 약 100KB 크기로 만듭니다. +LONG_CONTENT="" +for (( k=1; k<=100; k++ )); do # 이전보다 100배 더 긴 본문을 위해 100번 반복 + LONG_CONTENT+="$LOREM_IPSUM_CHUNK" +done + +# 전체 파일 카운터 +total_file_count=0 + +# 각 카테고리별로 파일 생성 +for cat_index in "${!CATEGORIES[@]}"; do + category="${CATEGORIES[$cat_index]}" + + # 첫 번째 카테고리에는 나머지 파일들도 추가 + if [ $cat_index -eq 0 ]; then + files_to_create=$((FILES_PER_CATEGORY + REMAINING_FILES)) + else + files_to_create=$FILES_PER_CATEGORY + fi + + echo "[$category] 카테고리에서 $files_to_create개 파일 생성 중..." + + for (( i=1; i<=files_to_create; i++ )); do + total_file_count=$((total_file_count + 1)) + + FILE_NAME="${category}-post-$i.mdx" + CATEGORY_DIR="$TMP_DIR/$category" + FILE_PATH="$CATEGORY_DIR/$FILE_NAME" + + # FrontMatterSchema에 맞는 frontmatter 생성 + TITLE="$category Post Title $i" + DATE="2025-$(printf "%02d" $(( (total_file_count % 12) + 1 )))-$(printf "%02d" $(( (total_file_count % 28) + 1 )))" + SUMMARY="This is a summary for $category post $i. Lorem ipsum dolor sit amet, consectetur adipiscing elit." + SLUG="${category}-post-$i" + COMPLETED="true" + + FRONTMATTER="--- +title: \"$TITLE\" +date: \"$DATE\" +tags: [\"tech\", \"coding\", \"$category\"] +summary: \"$SUMMARY\" +slug: \"$SLUG\" +category: \"$category\" +completed: $COMPLETED +---" + + # 파일 내용 조합 및 파일에 쓰기 + echo -e "$FRONTMATTER\n\n$LONG_CONTENT" > "$FILE_PATH" + + # 1000개마다 진행 상황 출력 + if (( total_file_count % 1000 == 0 )); then + echo "$total_file_count개 파일 생성 완료..." + fi + done + + echo "[$category] 카테고리 완료: $files_to_create개 파일 생성됨" +done + +END_TIME=$(date +%s) # 종료 시간 기록 +TOTAL_TIME=$((END_TIME - START_TIME)) + +echo -e "\n모든 $NUM_FILES개 더미 .md 파일 생성이 완료되었습니다!" +echo "카테고리별 분포:" +for category in "${CATEGORIES[@]}"; do + file_count=$(find "$TMP_DIR/$category" -name "*.mdx" | wc -l) + echo " $category: $file_count개" +done +echo "총 소요 시간: ${TOTAL_TIME}초" diff --git a/backend/test/bench/jest-bench.json b/backend/test/bench/jest-bench.json new file mode 100644 index 0000000..07995e9 --- /dev/null +++ b/backend/test/bench/jest-bench.json @@ -0,0 +1,19 @@ +{ + "moduleFileExtensions": ["js", "json", "ts"], + "rootDir": "../../", + "testRegex": ".bench.spec.ts$", + "transform": { + "^.+\\.(t|j)s$": "ts-jest" + }, + "collectCoverageFrom": ["**/*.(t|j)s"], + "coverageDirectory": "../coverage", + "testEnvironment": "node", + "moduleNameMapper": { + "^src/(.*)$": "/src/$1" + }, + "globals": { + "ts-jest": { + "tsconfig": "tsconfig.json" + } + } +} diff --git a/backend/test/bench/posts.bench.spec.ts b/backend/test/bench/posts.bench.spec.ts new file mode 100644 index 0000000..624ecc1 --- /dev/null +++ b/backend/test/bench/posts.bench.spec.ts @@ -0,0 +1,152 @@ +import { join } from "node:path"; +import { readdir, readFile } from "node:fs/promises"; +import { Test, TestingModule } from "@nestjs/testing"; +import { rmdir } from "node:fs/promises"; +import { BlogService } from "../../src/instances/blog/blog.service"; +import { PostsService } from "../../src/posts/posts.service"; +import * as matter from "gray-matter"; + +// ========================================== +// Native / Eager Implementation for Comparison +// ========================================== +class NativeBlogService { + public posts: any[] = []; + + constructor(private rootPath: string) {} + + async buildIndex() { + this.posts = []; + const files = await this.getFiles(this.rootPath); + + // Eagerly process all files (Naively) using Promise.all + // To prevent instant crash on 500k files, we chunk it slightly, but keep it "Eager" in spirit + // relative to the "Streaming/Lazy" approach. + const chunkSize = 5000; + for (let i = 0; i < files.length; i += chunkSize) { + const chunk = files.slice(i, i + chunkSize); + await Promise.all(chunk.map(async (file) => { + const content = await readFile(file, 'utf-8'); + // Naive parsing: Reads full content just to get frontmatter (inefficient memory) + const parsed = matter(content); + this.posts.push({ + frontmatter: parsed.data, + filePath: file + }); + })); + } + } + + // Faster recursive file search (Native) + private async getFiles(dir: string): Promise { + const dirents = await readdir(dir, { withFileTypes: true }); + const files = await Promise.all(dirents.map((dirent) => { + const res = join(dir, dirent.name); + return dirent.isDirectory() ? this.getFiles(res) : res; + })); + return Array.prototype.concat(...files).filter(f => f.endsWith('.md') || f.endsWith('.mdx')); + } + + findAll() { + return this.posts; + } + + findLatest(count: number) { + // Standard array sort and slice + return this.posts.sort((a, b) => + new Date(b.frontmatter.date).getTime() - new Date(a.frontmatter.date).getTime() + ).slice(0, count); + } +} + +// ========================================== +// Benchmark Suite +// ========================================== +describe("PostsService Benchmark: Lazy (User) vs Eager (Native)", () => { + let postsService: PostsService; + let blogService: BlogService; + let nativeService: NativeBlogService; + + const BENCH_POSTS_PATH = join(process.cwd(), "test/bench/tmp/posts"); + + beforeAll(async () => { + process.env.BLOG_POSTS_PATH = BENCH_POSTS_PATH; + + const module: TestingModule = await Test.createTestingModule({ + providers: [BlogService, PostsService], + }).compile(); + + blogService = module.get(BlogService); + postsService = module.get(PostsService); + nativeService = new NativeBlogService(BENCH_POSTS_PATH); + + jest.spyOn(blogService.logger, "log").mockImplementation(() => {}); + jest.spyOn(blogService.logger, "warn").mockImplementation(() => {}); + }); + + afterAll(async () => { + // Cleanup DISABLED for manual inspection if needed, or re-enable: + // await rmdir(BENCH_POSTS_PATH, { recursive: true }); + delete process.env.BLOG_POSTS_PATH; + }); + + const getMemory = () => { + if (global.gc) global.gc(); + return process.memoryUsage().heapUsed / 1024 / 1024; + } + + it("Comparison: Initial Index Building (Cold Start)", async () => { + console.log("\n[Benchmark] Index Building (Cold Start)..."); + + // 1. measure Lazy/User Logic (BlogService) + // It builds index from files if jsonl doesn't exist. + // We simulate this by ensuring jsonl might NOT exist or forcing rebuild logic if exposed, + // but blogService.onModuleInit() checks for it. + // We will test `buildIndexFromFiles` directly for fair comparison. + + const startMemLazy = getMemory(); + const startLazy = performance.now(); + await blogService.buildIndexFromFiles(); + // Note: blogService writes to posts.jsonl, but also loads to memory? + // Actually onModuleInit does the loading. buildIndexFromFiles just writes disk. + // So we should measure `buildIndexFromFiles` (Disk I/O + Parsing) AND `onModuleInit` (Loading). + // Let's just measure `onModuleInit` assuming no index exists for the first run? + // To be precise, let's measure `buildIndexFromFiles` as "Indexing Task". + const endLazy = performance.now(); + const endMemLazy = getMemory(); + + console.log(`[User Logic] Index Build Time: ${(endLazy - startLazy).toFixed(2)}ms`); + console.log(`[User Logic] Memory Delta: ${(endMemLazy - startMemLazy).toFixed(2)} MB`); + + // 2. Measure Native/Eager Logic + if (global.gc) global.gc(); + const startMemNative = getMemory(); + const startNative = performance.now(); + await nativeService.buildIndex(); + const endNative = performance.now(); + const endMemNative = getMemory(); + + console.log(`[Native Logic] Index Build Time: ${(endNative - startNative).toFixed(2)}ms`); + console.log(`[Native Logic] Memory Delta: ${(endMemNative - startMemNative).toFixed(2)} MB`); + }, 600000); // 10 min timeout for 500k files + + it("Comparison: Querying (Find Latest 100)", async () => { + // Ensure both are ready (reload blogService from the index we just built) + await blogService.onModuleInit(); + + console.log("\n[Benchmark] Find Latest 100..."); + + // User Logic + const startLazy = performance.now(); + await postsService.findLatest(100); + const endLazy = performance.now(); + + // Native Logic + const startNative = performance.now(); + const nativeRes = nativeService.findLatest(100); + if (nativeRes.length === 0) throw new Error("Native empty"); + const endNative = performance.now(); + + console.log(`[User Logic] Query Time: ${(endLazy - startLazy).toFixed(2)}ms`); + console.log(`[Native Logic] Query Time: ${(endNative - startNative).toFixed(2)}ms`); + }); +});