-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Feat/#25 Backend Crawling (뉴스 정보 크롤링)
- Loading branch information
Showing
21 changed files
with
406 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
105 changes: 105 additions & 0 deletions
105
backend/src/main/java/com/rollthedice/backend/domain/crawling/NewsCrawlingService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
package com.rollthedice.backend.domain.crawling; | ||
|
||
import com.rollthedice.backend.domain.dto.NewsUrlDto; | ||
import com.rollthedice.backend.domain.news.entity.News; | ||
import com.rollthedice.backend.domain.news.service.NewsCategory; | ||
import com.rollthedice.backend.domain.news.service.NewsService; | ||
import lombok.RequiredArgsConstructor; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
import org.springframework.scheduling.annotation.Scheduled; | ||
import org.springframework.stereotype.Service; | ||
|
||
import java.io.IOException; | ||
import java.util.Objects; | ||
|
||
@Slf4j | ||
@Service | ||
@RequiredArgsConstructor | ||
public class NewsCrawlingService { | ||
private static final String MAIN_URL = "https://news.naver.com/section/"; | ||
private static final String CRON = "0 0 6,12 * * *"; | ||
private static final String ZONE = "Asia/Seoul"; | ||
|
||
private final NewsService newsService; | ||
|
||
@Scheduled(cron = CRON, zone = ZONE) | ||
public void scrap() throws IOException { | ||
for (NewsCategory category : NewsCategory.values()) { | ||
String categoryUrl = MAIN_URL + category.getNum(); | ||
String categoryName = category.getName(); | ||
|
||
scrapNewsUrls(categoryUrl); | ||
for (final News news : newsService.getAllNews()) { | ||
scrapNewsContentsAndUpdate(categoryName, news); | ||
} | ||
} | ||
} | ||
|
||
private void scrapNewsUrls(String categoryUrl) throws IOException { | ||
Document doc = Jsoup.connect(categoryUrl).get(); | ||
Elements newsList = doc.select(".sa_list"); | ||
|
||
for (Element news : newsList.select("li")) { | ||
String thumbnailUrl = scrapThumbnailUrl(news); | ||
String url = Objects.requireNonNull(news.selectFirst(".sa_text_title")).attr("href"); | ||
|
||
newsService.addNews(new NewsUrlDto(url, thumbnailUrl)); | ||
} | ||
} | ||
|
||
private String scrapThumbnailUrl(final Element news) { | ||
try { | ||
Element thumbnailUrlElement = news.selectFirst(".sa_thumb_link img"); | ||
return thumbnailUrlElement != null ? thumbnailUrlElement.attr("src") : null; | ||
} catch (NullPointerException e) { | ||
return null; | ||
} | ||
} | ||
|
||
private void scrapNewsContentsAndUpdate(String categoryName, News news) throws IOException { | ||
Document doc = Jsoup.connect(news.getUrl()).get(); | ||
|
||
String title = scrapTitle(doc); | ||
String content = scrapContent(doc); | ||
String postDate = scrapPostDate(doc); | ||
|
||
news.addNewsBody(title, content, categoryName, postDate); | ||
} | ||
|
||
private String scrapTitle(final Document doc) { | ||
Element titleElement = doc.selectFirst("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2"); | ||
if (titleElement == null) { | ||
titleElement = doc.selectFirst("#content > div.end_ct > div > h2"); | ||
} | ||
if (titleElement != null) { | ||
return titleElement.text(); | ||
} | ||
return null; | ||
} | ||
|
||
private String scrapContent(final Document doc) { | ||
Elements contentElements = doc.select("article#dic_area"); | ||
if (contentElements.isEmpty()) { | ||
contentElements = doc.select("#articeBody"); | ||
} | ||
return contentElements.outerHtml().replaceAll("\\<[^>]*>|\\n", ""); | ||
} | ||
|
||
private String scrapPostDate(final Document doc) { | ||
Element dateElement = doc.selectFirst("div#ct> div.media_end_head.go_trans > div.media_end_head_info.nv_notrans > div.media_end_head_info_datestamp > div > span"); | ||
if (dateElement != null) { | ||
return dateElement.attr("data-date-time"); | ||
} else { | ||
Element altDateElement = doc.selectFirst("#content > div.end_ct > div > div.article_info > span > em"); | ||
if (altDateElement != null) { | ||
return altDateElement.text(); | ||
} | ||
} | ||
return null; | ||
} | ||
} | ||
|
15 changes: 15 additions & 0 deletions
15
backend/src/main/java/com/rollthedice/backend/domain/dto/NewsUrlDto.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
package com.rollthedice.backend.domain.dto; | ||
|
||
import lombok.AllArgsConstructor; | ||
import lombok.Getter; | ||
import lombok.NoArgsConstructor; | ||
import lombok.Setter; | ||
|
||
@Getter | ||
@NoArgsConstructor | ||
@AllArgsConstructor | ||
@Setter | ||
public class NewsUrlDto { | ||
private String url; | ||
private String thumbnailUrl; | ||
} |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7 changes: 7 additions & 0 deletions
7
backend/src/main/java/com/rollthedice/backend/domain/news/repository/NewsRepository.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package com.rollthedice.backend.domain.news.repository; | ||
|
||
import com.rollthedice.backend.domain.news.entity.News; | ||
import org.springframework.data.jpa.repository.JpaRepository; | ||
|
||
public interface NewsRepository extends JpaRepository<News, Long> { | ||
} |
22 changes: 22 additions & 0 deletions
22
backend/src/main/java/com/rollthedice/backend/domain/news/service/NewsCategory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package com.rollthedice.backend.domain.news.service; | ||
|
||
import lombok.Getter; | ||
|
||
@Getter | ||
public enum NewsCategory { | ||
POLITICS(100, "정치"), | ||
ECONOMY(101, "경제"), | ||
SOCIETY(102, "사회"), | ||
LIVING(103, "생활/문화"), | ||
WORLD(104, "세계"), | ||
SCIENCE(105, "IT/과학"); | ||
|
||
private final int num; | ||
private final String name; | ||
|
||
NewsCategory(int num, String name) { | ||
this.num = num; | ||
this.name = name; | ||
} | ||
|
||
} |
29 changes: 29 additions & 0 deletions
29
backend/src/main/java/com/rollthedice/backend/domain/news/service/NewsService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package com.rollthedice.backend.domain.news.service; | ||
|
||
import com.rollthedice.backend.domain.dto.NewsUrlDto; | ||
import com.rollthedice.backend.domain.news.entity.News; | ||
import com.rollthedice.backend.domain.news.repository.NewsRepository; | ||
import lombok.RequiredArgsConstructor; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.springframework.stereotype.Service; | ||
import org.springframework.transaction.annotation.Transactional; | ||
|
||
import java.util.List; | ||
|
||
@Slf4j | ||
@Service | ||
@RequiredArgsConstructor | ||
public class NewsService { | ||
private final NewsRepository newsRepository; | ||
|
||
@Transactional | ||
public void addNews(NewsUrlDto dto) { | ||
newsRepository.save(News.builder().thumbnailUrl(dto.getThumbnailUrl()).url(dto.getUrl()).build()); | ||
} | ||
|
||
@Transactional(readOnly = true) | ||
public List<News> getAllNews() { | ||
return newsRepository.findAll(); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
22 changes: 22 additions & 0 deletions
22
iOS/RollTheDice/RollTheDice/Source/View/Chat/ChatType/ChatType.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// | ||
// ChatType.swift | ||
// RollTheDice | ||
// | ||
// Created by Subeen on 2/14/24. | ||
// | ||
|
||
import Foundation | ||
|
||
enum ChatType: String { | ||
case ai | ||
case user | ||
|
||
var name: String { | ||
switch self { | ||
case .ai: | ||
return "ai" | ||
case .user: | ||
return "user" | ||
} | ||
} | ||
} |
Oops, something went wrong.