Merge branch 'develop'

juny0955 · juny0955 · commit 8ee741ad4cc8 · 2025-04-22T22:11:50.000+09:00
diff --git a/Dockerfile b/Dockerfile
@@ -1,9 +1,34 @@
 FROM openjdk:17-jdk
 
-ARG JAR_FILE_PATH=/build/libs/*.jar
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        wget gnupg unzip \
+        libnss3 libatk-bridge2.0-0 libgbm1 libgtk-3-0 \
+        libx11-xcb1 libxrandr2 libxdamage1 libxcomposite1 libxss1 libasound2 \
+    && rm -rf /var/lib/apt/lists/*
 
-COPY $JAR_FILE_PATH app.jar
+# Chrome 설치
+RUN wget -qO- https://dl.google.com/linux/linux_signing_key.pub \
+        | gpg --dearmor -o /usr/share/keyrings/google.gpg \
+    && echo "deb [signed-by=/usr/share/keyrings/google.gpg] \
+        https://dl.google.com/linux/chrome/deb/ stable main" \
+        > /etc/apt/sources.list.d/google.list \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+        google-chrome-stable=135.0.7049.84-1 \
+    && rm -rf /var/lib/apt/lists/*
 
-EXPOSE 8080
+# Chromedriver 설치
+RUN wget -qO /tmp/chromedriver.zip \
+        https://chromedriver.storage.googleapis.com/135.0.7049.84/chromedriver_linux64.zip \
+    && unzip /tmp/chromedriver.zip -d /usr/local/bin \
+    && chmod +x /usr/local/bin/chromedriver \
+    && rm /tmp/chromedriver.zip
+
+# 환경 변수 설정
+ENV CHROME_OPTIONS="--headless --no-sandbox --disable-dev-shm-usage"
 
+# 애플리케이션 설정
+ARG JAR_FILE_PATH=/build/libs/*.jar
+COPY $JAR_FILE_PATH app.jar
+EXPOSE 8080
 ENTRYPOINT ["java", "-jar", "app.jar"]
diff --git a/build.gradle b/build.gradle
@@ -25,7 +25,6 @@ repositories {
 
 dependencies {
     implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
-    implementation 'org.springframework.boot:spring-boot-starter-web'
     implementation 'org.springframework.boot:spring-boot-starter-mail'
     implementation 'org.springframework.boot:spring-boot-starter-thymeleaf'
     implementation 'org.springframework.boot:spring-boot-starter-validation'
@@ -46,6 +45,12 @@ dependencies {
     runtimeOnly 'com.mysql:mysql-connector-j'
     runtimeOnly 'com.h2database:h2'
     testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
+
+    implementation('org.springframework.boot:spring-boot-starter-web') {
+        exclude group: 'org.springframework.boot', module: 'spring-boot-starter-tomcat'
+    }
+    implementation 'org.springframework.boot:spring-boot-starter-undertow'
+
 }
 
 tasks.named('test') {
diff --git a/src/main/java/com/davcatch/devcatch/common/integration/crawling/WebCrawler.java b/src/main/java/com/davcatch/devcatch/common/integration/crawling/WebCrawler.java
@@ -27,6 +27,7 @@ public class WebCrawler {
 	public Optional<Document> getDocument(String link) {
 		log.debug("크롤링 시작 : {}", link);
 
+		Document document = null;
 		try {
 			ResponseEntity<String> response = crawlingRestTemplate.getForEntity(link, String.class);
 			String html = response.getBody();
@@ -36,14 +37,12 @@ public Optional<Document> getDocument(String link) {
 				return Optional.empty();
 			}
 
-			Document document = Jsoup.parse(html);
+			document = Jsoup.parse(html);
 			log.debug("크롤링 정상 수집 : {}", link);
-
-			return Optional.of(document);
-
 		} catch (Exception e) {
 			log.error("({}) 크롤링 중 에러 발생 : {}", link, e.getMessage());
-			return Optional.empty();
 		}
+
+		return Optional.ofNullable(document);
 	}
 }
diff --git a/src/main/java/com/davcatch/devcatch/common/integration/rss/RssReaderService.java b/src/main/java/com/davcatch/devcatch/common/integration/rss/RssReaderService.java
@@ -30,6 +30,7 @@ public class RssReaderService {
 	public Optional<SyndFeed> reader(Source source) {
 		log.debug("[{}] RSS FEED 수집 시작", source.getName());
 
+		SyndFeed feed = null;
 		try {
 			ResponseEntity<String> response = rssRestTemplate.getForEntity(source.getFeedUrl(), String.class);
 
@@ -45,13 +46,12 @@ public Optional<SyndFeed> reader(Source source) {
 			syndFeedInput.setPreserveWireFeed(true);
 			syndFeedInput.setXmlHealerOn(true); // XML 문법 오류 자동 복구 활성화
 
-			SyndFeed feed = syndFeedInput.build(new StringReader(rssFeedXml));
+			feed = syndFeedInput.build(new StringReader(rssFeedXml));
 			log.debug("RSS FEED 정상 수집 : {}", source.getName());
-			return Optional.of(feed);
-
 		} catch (FeedException e) {
 			log.error("[{}] RSS FEED 수집중 오류 발생 : {}", source.getName(), e.getMessage());
-			return Optional.empty();
 		}
+
+		return Optional.ofNullable(feed);
 	}
 }
diff --git a/src/main/java/com/davcatch/devcatch/common/integration/selenium/SeleniumBrowserService.java b/src/main/java/com/davcatch/devcatch/common/integration/selenium/SeleniumBrowserService.java
@@ -65,6 +65,7 @@ public Optional<SyndFeed> reader(Source source) {
 		log.debug("[{}] 셀레니움 헤드리스 RSS FEED 수집 시작", source.getName());
 
 		WebDriver webDriver = null;
+		SyndFeed feed = null;
 		try {
 			webDriver = createWebDriver();
 
@@ -93,13 +94,14 @@ public Optional<SyndFeed> reader(Source source) {
 			syndFeedInput.setXmlHealerOn(true); // XML 문법 오류 자동 복구 활성화
 
 			String parse = Jsoup.parse(pageSource).body().text();
-			SyndFeed feed = syndFeedInput.build(new StringReader(parse));
-			return Optional.of(feed);
+
+			feed = syndFeedInput.build(new StringReader(parse));
 		} catch (Exception e) {
 			log.error("[{}] RSS FEED 수집중 오류 발생 : {}", source.getName(), e.getMessage());
-			return Optional.empty();
 		} finally {
 			destroyWebDriver(webDriver);
 		}
+
+		return Optional.ofNullable(feed);
 	}
 }
diff --git a/src/main/java/com/davcatch/devcatch/common/scheduler/article/parser/strategy/AbstractArticleStrategy.java b/src/main/java/com/davcatch/devcatch/common/scheduler/article/parser/strategy/AbstractArticleStrategy.java
@@ -1,5 +1,6 @@
 package com.davcatch.devcatch.common.scheduler.article.parser.strategy;
 
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
@@ -25,7 +26,36 @@ public abstract class AbstractArticleStrategy implements ArticleParseStrategy {
 	private final SeleniumBrowserService seleniumBrowserService;
 	private final ContentExtractorFactory contentExtractorFactory;
 
-	protected List<SyndEntry> getEntries(Source source) {
+	@Override
+	public List<ParsedArticle> process(Source source) throws CustomException {
+		ContentExtractorStrategy contentExtractor = getContentExtractor(source.getParseMethod());
+
+		List<SyndEntry> entries = fetchEntries(source);
+
+		if (entries.isEmpty()) {
+			return Collections.emptyList();
+		}
+
+		List<ParsedArticle> parsedArticles = new ArrayList<>();
+		for (SyndEntry entry : entries) {
+			try {
+				ParsedArticle article = processEntry(entry, source, contentExtractor);
+				if (article != null) {
+					parsedArticles.add(article);
+				}
+			} catch (Exception e) {
+				log.error("[{}] 엔트리 처리 중 오류 발생: {}", source.getName(), e.getMessage());
+			}
+		}
+
+		log.debug("[{}] 총 {}개 아티클 파싱 완료", source.getName(), parsedArticles.size());
+		return parsedArticles;
+	}
+
+	protected abstract List<SyndEntry> fetchEntries(Source source);
+	protected abstract ParsedArticle processEntry(SyndEntry entry, Source source, ContentExtractorStrategy contentExtractor) throws CustomException;
+
+	protected List<SyndEntry> getEntriesFromRss(Source source) {
 		return rssReaderService.reader(source)
 			.map(SyndFeed::getEntries)
 			.orElseGet(() -> {
diff --git a/src/main/java/com/davcatch/devcatch/common/scheduler/article/parser/strategy/CloudFlareParseStrategy.java b/src/main/java/com/davcatch/devcatch/common/scheduler/article/parser/strategy/CloudFlareParseStrategy.java
@@ -1,6 +1,5 @@
 package com.davcatch.devcatch.common.scheduler.article.parser.strategy;
 
-import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
 
@@ -27,17 +26,14 @@ public CloudFlareParseStrategy(RssReaderService rssReaderService, SeleniumBrowse
 	}
 
 	@Override
-	public List<ParsedArticle> process(Source source) throws CustomException {
-		ContentExtractorStrategy extractor = getContentExtractor(source.getParseMethod());
-		List<SyndEntry> entries = getEntriesFromHeadless(source);
-
-		List<ParsedArticle> parsedArticles = new ArrayList<>();
-		for (SyndEntry entry : entries) {
-			String content = extractor.extractContent(entry, null);
-			parsedArticles.add(ParsedArticle.of(entry, source, content));
-		}
+	protected List<SyndEntry> fetchEntries(Source source) {
+		return getEntriesFromHeadless(source);
+	}
 
-		return parsedArticles;
+	@Override
+	protected ParsedArticle processEntry(SyndEntry entry, Source source, ContentExtractorStrategy contentExtractor) {
+		String content = contentExtractor.extractContent(entry, null);
+		return ParsedArticle.of(entry, source, content);
 	}
 
 	@Override
diff --git a/src/main/java/com/davcatch/devcatch/common/scheduler/article/parser/strategy/CrawlingParseStrategy.java b/src/main/java/com/davcatch/devcatch/common/scheduler/article/parser/strategy/CrawlingParseStrategy.java
@@ -1,21 +1,21 @@
 package com.davcatch.devcatch.common.scheduler.article.parser.strategy;
 
-import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
 
 import org.jsoup.nodes.Document;
 import org.springframework.stereotype.Component;
 
 import com.davcatch.devcatch.common.exception.CustomException;
-import com.davcatch.devcatch.common.integration.selenium.SeleniumBrowserService;
-import com.davcatch.devcatch.domain.source.ParseMethod;
-import com.davcatch.devcatch.domain.source.Source;
+import com.davcatch.devcatch.common.exception.ErrorCode;
 import com.davcatch.devcatch.common.integration.crawling.WebCrawler;
 import com.davcatch.devcatch.common.integration.rss.RssReaderService;
+import com.davcatch.devcatch.common.integration.selenium.SeleniumBrowserService;
 import com.davcatch.devcatch.common.scheduler.article.dto.ParsedArticle;
 import com.davcatch.devcatch.common.scheduler.article.extractor.factory.ContentExtractorFactory;
 import com.davcatch.devcatch.common.scheduler.article.extractor.strategy.ContentExtractorStrategy;
+import com.davcatch.devcatch.domain.source.ParseMethod;
+import com.davcatch.devcatch.domain.source.Source;
 import com.rometools.rome.feed.synd.SyndEntry;
 
 import lombok.extern.slf4j.Slf4j;
@@ -33,24 +33,19 @@ public CrawlingParseStrategy(RssReaderService rssReaderService, SeleniumBrowserS
 	}
 
 	@Override
-	public List<ParsedArticle> process(Source source) throws CustomException {
-		ContentExtractorStrategy extractor = getContentExtractor(source.getParseMethod());
-		List<SyndEntry> entries = getEntries(source);
-
-		List<ParsedArticle> parsedArticles = new ArrayList<>();
-		for (int i = 0; i < Math.min(MAX_PARSE_PAGE, entries.size()) ; i++) {
-			SyndEntry entry = entries.get(i);
-
-			String link = source.isUseLink() ? entry.getLink() : entry.getUri();
-			Document document = webCrawler.getDocument(link).orElse(null);
-			if (document == null)
-				continue;
+	protected List<SyndEntry> fetchEntries(Source source) {
+		List<SyndEntry> entries = getEntriesFromRss(source);
+		return entries.subList(0, Math.min(MAX_PARSE_PAGE, entries.size()));
+	}
 
-			String content = extractor.extractContent(null, document);
-			parsedArticles.add(ParsedArticle.of(entry, source, content));
-		}
+	@Override
+	protected ParsedArticle processEntry(SyndEntry entry, Source source, ContentExtractorStrategy contentExtractor) throws CustomException {
+		String link = source.isUseLink() ? entry.getLink() : entry.getUri();
+		Document document = webCrawler.getDocument(link)
+			.orElseThrow(() -> new CustomException(ErrorCode.CONTENT_PARSE_ERROR));
 
-		return parsedArticles;
+		String content = contentExtractor.extractContent(null, document);
+		return ParsedArticle.of(entry, source, content);
 	}
 
 	@Override
diff --git a/src/main/java/com/davcatch/devcatch/common/scheduler/article/parser/strategy/RssParseStrategy.java b/src/main/java/com/davcatch/devcatch/common/scheduler/article/parser/strategy/RssParseStrategy.java
@@ -1,19 +1,17 @@
 package com.davcatch.devcatch.common.scheduler.article.parser.strategy;
 
-import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
 
 import org.springframework.stereotype.Component;
 
+import com.davcatch.devcatch.common.integration.rss.RssReaderService;
 import com.davcatch.devcatch.common.integration.selenium.SeleniumBrowserService;
 import com.davcatch.devcatch.common.scheduler.article.dto.ParsedArticle;
+import com.davcatch.devcatch.common.scheduler.article.extractor.factory.ContentExtractorFactory;
 import com.davcatch.devcatch.common.scheduler.article.extractor.strategy.ContentExtractorStrategy;
 import com.davcatch.devcatch.domain.source.ParseMethod;
 import com.davcatch.devcatch.domain.source.Source;
-import com.davcatch.devcatch.common.exception.CustomException;
-import com.davcatch.devcatch.common.integration.rss.RssReaderService;
-import com.davcatch.devcatch.common.scheduler.article.extractor.factory.ContentExtractorFactory;
 import com.rometools.rome.feed.synd.SyndEntry;
 
 import lombok.extern.slf4j.Slf4j;
@@ -27,17 +25,14 @@ public RssParseStrategy(RssReaderService rssReaderService, SeleniumBrowserServic
 	}
 
 	@Override
-	public List<ParsedArticle> process(Source source) throws CustomException {
-		ContentExtractorStrategy extractor = getContentExtractor(source.getParseMethod());
-		List<SyndEntry> entries = getEntries(source);
-
-		List<ParsedArticle> parsedArticles = new ArrayList<>();
-		for (SyndEntry entry : entries) {
-			String content = extractor.extractContent(entry, null);
-			parsedArticles.add(ParsedArticle.of(entry, source, content));
-		}
+	protected List<SyndEntry> fetchEntries(Source source) {
+		return getEntriesFromRss(source);
+	}
 
-		return parsedArticles;
+	@Override
+	protected ParsedArticle processEntry(SyndEntry entry, Source source, ContentExtractorStrategy contentExtractor) {
+		String content = contentExtractor.extractContent(entry, null);
+		return ParsedArticle.of(entry, source, content);
 	}
 
 	@Override
diff --git a/src/main/java/com/davcatch/devcatch/repository/tag/TagRepository.java b/src/main/java/com/davcatch/devcatch/repository/tag/TagRepository.java
@@ -11,8 +11,6 @@
 
 public interface TagRepository extends JpaRepository<Tag, Long> {
 
-	Optional<Tag> findByTagType(TagType tagType);
-
 	@Query("select t from Tag t where t.tagType in :tagTypes")
 	List<Tag> findInTagType(List<TagType> tagTypes);
 }
diff --git a/src/main/resources/templates/layout/web_layout.html b/src/main/resources/templates/layout/web_layout.html
@@ -15,6 +15,8 @@
     <script th:src="@{/js/tailwind-config.js}"></script>
     <!-- 추가 스타일 및 리소스 -->
     <th:block th:replace="${additionalResources}" />
+    <script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-7068040916236616"
+            crossorigin="anonymous"></script>
 </head>
 <body class="bg-gray-50 font-sans min-h-screen flex flex-col">
     <!-- 네비게이션 바 -->

Original file line number	Diff line number	Diff line change
`@@ -27,6 +27,7 @@ public class WebCrawler {`
`27`	`27`	`public Optional<Document> getDocument(String link) {`
`28`	`28`	`log.debug("크롤링 시작 : {}", link);`
`29`	`29`
	`30`	`+ Document document = null;`
`30`	`31`	`try {`
`31`	`32`	`ResponseEntity<String> response = crawlingRestTemplate.getForEntity(link, String.class);`
`32`	`33`	`String html = response.getBody();`
`@@ -36,14 +37,12 @@ public Optional<Document> getDocument(String link) {`
`36`	`37`	`return Optional.empty();`
`37`	`38`	`}`
`38`	`39`
`39`		`- Document document = Jsoup.parse(html);`
	`40`	`+ document = Jsoup.parse(html);`
`40`	`41`	`log.debug("크롤링 정상 수집 : {}", link);`
`41`		`-`
`42`		`- return Optional.of(document);`
`43`		`-`
`44`	`42`	`} catch (Exception e) {`
`45`	`43`	`log.error("({}) 크롤링 중 에러 발생 : {}", link, e.getMessage());`
`46`		`- return Optional.empty();`
`47`	`44`	`}`
	`45`	`+`
	`46`	`+ return Optional.ofNullable(document);`
`48`	`47`	`}`
`49`	`48`	`}`
Original file line number	Diff line number	Diff line change
`@@ -11,8 +11,6 @@`
`11`	`11`
`12`	`12`	`public interface TagRepository extends JpaRepository<Tag, Long> {`
`13`	`13`
`14`		`- Optional<Tag> findByTagType(TagType tagType);`
`15`		`-`
`16`	`14`	`@Query("select t from Tag t where t.tagType in :tagTypes")`
`17`	`15`	`List<Tag> findInTagType(List<TagType> tagTypes);`
`18`	`16`	`}`