Skip to content

Commit

Permalink
Merge branch 'refs/heads/feature/ISSUE-109' into staging
Browse files Browse the repository at this point in the history
  • Loading branch information
stopmin committed Jul 17, 2024
2 parents 36afb0b + b5b092d commit bc485b1
Show file tree
Hide file tree
Showing 12 changed files with 92 additions and 76 deletions.
2 changes: 1 addition & 1 deletion app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from fastapi import FastAPI
from starlette.exceptions import HTTPException

from app.router.chatbot_article_detail_router import chatbot_article_router
from app.router.chatbot_article_router import chatbot_article_router
from app.config.exception_handler import exception_handler, http_exception_handler
from app.config.middlewares.request_response_logging_middle_ware import (
LoggingMiddleware,
Expand Down
6 changes: 0 additions & 6 deletions app/model/crawled_article.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,6 @@ class Articles(Base):
category = Column(CHAR(255), nullable=True)
published_at = Column(DateTime, nullable=True)
image_url = Column(String, nullable=True)
probability_issue_finder = Column(Integer, nullable=True)
probability_lifestyle_consumer = Column(Integer, nullable=True)
probability_entertainer = Column(Integer, nullable=True)
probability_tech_specialist = Column(Integer, nullable=True)
probability_professionals = Column(Integer, nullable=True)


@event.listens_for(Articles, "before_update", propagate=True)
def update_timestamp(mapper, connection, target): # pylint: disable=unused-argument
Expand Down
12 changes: 6 additions & 6 deletions app/model/prompt/simple_article/2024-07-14.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
다음은 json 형식의 예시이다:
{{
"title": "MZ세대가 흥미를 끌만한 기사 제목(한국어)",
"content": "기사 본문 (한국어). 단, 경제 기사의 독자층이 경제 지식이 부족한 20대 초반인 것을 고려하여 적당한 이모지를 사용하여 친근하고 간결하게 설명할 것. 문단은 \n으로 구분할 것.",
"content": "기사 본문 (한국어). 단, 경제 기사의 독자층이 경제 지식이 부족한 20대 초반인 것을 고려하여 적당한 이모지를 사용하여 친근하게 재생성할 것. 문단은 \n으로 구분할 것.",
"phrase": {{"어려웠던 경제 표현들" : "어려웠던 경제 표현들을 쉽게 바꾼 문구"}} (예시: {{"환율" : "다른 나라 돈과 우리나라 돈을 교환하는 비율"}}),
"comment": "기사를 보고 추론할 수 있는 것 1문장을 친구에게 설명하는 듯한 표현으로",
"category": "Category 중 하나"
}}

enum Category:
ECONOMY_AND_BUSINESS = "경제 및 기업"
POLITICS_AND_SOCIETY = "정치 및 사회"
TECHNOLOGY_AND_CULTURE = "기술 및 문화"
SPORTS_AND_LEISURE = "스포츠 및 여가"
OPINION_AND_ANALYSIS = "오피니언 및 분석"
ECONOMY_AND_BUSINESS
POLITICS_AND_SOCIETY
TECHNOLOGY_AND_CULTURE
SPORTS_AND_LEISURE
OPINION_AND_ANALYSIS

결과는 json 형식이어야 한다.

Expand Down
10 changes: 5 additions & 5 deletions app/model/subscription.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@


class MailTypeCategory(Enum):
ECONOMY_AND_BUSINESS = "경제 및 기업"
POLITICS_AND_SOCIETY = "정치 및 사회"
TECHNOLOGY_AND_CULTURE = "기술 및 문화"
SPORTS_AND_LEISURE = "스포츠 및 여가"
OPINION_AND_ANALYSIS = "오피니언 및 분석"
ECONOMY_AND_BUSINESS = "ECONOMY_AND_BUSINESS" # 경제와 비즈니스
POLITICS_AND_SOCIETY = "POLITICS_AND_SOCIETY" # 정치와 사회
TECHNOLOGY_AND_CULTURE = "TECHNOLOGY_AND_CULTURE" # 기술과 문화
SPORTS_AND_LEISURE = "SPORTS_AND_LEISURE" # 스포츠와 여가
OPINION_AND_ANALYSIS = "OPINION_AND_ANALYSIS" # 의견과 분석


class Subscription(Base):
Expand Down
26 changes: 12 additions & 14 deletions app/rag_lang_chain/langchain_applied.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from langchain.schema import Document
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from pydantic.v1 import BaseModel

from app.config.loguru_config import logger
from app.rag_lang_chain.chromadb_manager import ChromaDBManager
Expand All @@ -15,9 +16,9 @@
)


class RagAppliedResult:
class RagAppliedResult(BaseModel):
result_text: str
related_documents: List[Union[Document, dict]]
related_documents: List[Document]


async def request_rag_applied_openai(
Expand All @@ -34,11 +35,7 @@ async def request_rag_applied_openai(
)

# Step 1: Google Custom Search API를 사용하여 관련 정보 수집
google_results = await google_cse_retriever.retrieve(
original_text
) # FIXME: 왜 GoogleCSERetriever를 사용하는가? # pylint: disable=fixme
if not google_results:
raise HTTPException(status_code=404, detail="No results found from Google.")
google_results = await google_cse_retriever.retrieve(original_text)

# Step 2: 검색 결과를 벡터화하고 ChromaDB에 저장
chroma_db_manager = ChromaDBManager()
Expand All @@ -49,14 +46,17 @@ async def request_rag_applied_openai(
additional_info = await search.aget_relevant_documents(original_text, num_results=3)

# Step 4: 프롬프트 생성(원문 + 검색 결과 + 추가 정보)
rag_applied_prompt = await create_rag_applied_prompt(
original_prompt=system_prompt, relevant_info=search_results + additional_info
)
rag_applied_prompt = system_prompt
if search_results:
rag_applied_prompt = await create_rag_applied_prompt(
original_prompt=system_prompt,
relevant_info=search_results + additional_info,
)

# Step 5: OpenAI 요청 결과 반환
try:
search_llm = ChatOpenAI(
temperature=0, model="gpt-4", max_tokens=1500, api_key=openai_api_key
temperature=0, model="gpt-4o", max_tokens=1500, api_key=openai_api_key
)
response = await search_llm.agenerate(
messages=[[HumanMessage(rag_applied_prompt)]]
Expand All @@ -70,11 +70,9 @@ async def request_rag_applied_openai(

logger.info(f"Response: {response.generations[0][0].text}")

# response.generations[0][0].text

return RagAppliedResult(
result_text=response.generations[0][0].text,
related_documents=search_results + additional_info,
related_documents=search_results,
)


Expand Down
Empty file added app/recommend/__init__.py
Empty file.
16 changes: 0 additions & 16 deletions app/repository/crawled_article_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,6 @@ async def get(self, pk: int, session: AsyncSession):
)
return article

async def set_interest_type(
self, pk:int, interest_types : List[int], session: AsyncSession
):
repository = get_repository(Articles)(session)
return await repository.update_by_pk(
pk = pk,
data = {
'probability_issue_finder': interest_types[0],
'probability_lifestyle_consumer': interest_types[1],
'probability_entertainer': interest_types[2],
'probability_tech_specialist': interest_types[3],
'probability_professionals': interest_types[4]
}

)

async def get_all(self, session: AsyncSession):
repository = get_repository(Articles)(session)
return await repository.filter()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
from fastapi import APIRouter
from fastapi import APIRouter, Depends
from pydantic import BaseModel

from app.service.chatbot_article_detail_service import request_rag_applied_openai
from sqlalchemy.ext.asyncio import AsyncSession
from app.database.session import get_db_session
from app.service.chatbot_article_service import request_rag_applied_openai
from app.utils.generic_response import GenericResponseDTO

chatbot_article_router = APIRouter()

# 사용자 요청
class GenerateDetailArticleRequestDTO(BaseModel):
news_content: str
id : int
prompt: str


@chatbot_article_router.post(
"/chatbot-article-detail", response_model=GenericResponseDTO
"/chatbot/article", response_model=GenericResponseDTO
)
async def chatbot_article_detail_(
request: GenerateDetailArticleRequestDTO,
session: AsyncSession = Depends(get_db_session)
):
rag_applied_result = await request_rag_applied_openai(
original_text=request.news_content,
system_prompt=request.prompt
news_id =request.id,
system_prompt=request.prompt,
session = session
)

return GenericResponseDTO(
Expand Down
8 changes: 5 additions & 3 deletions app/service/article_manage_service.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from datetime import datetime
from typing import List

from sqlalchemy.ext.asyncio import AsyncSession

from app.model.article_publisher import Publisher
from app.model.crawled_article import Articles
from app.model.subscription import MailTypeCategory
from app.repository.crawled_article_crud import CrawledArticleRepository


class ArticleManageService:
async def create_article(
self,
Expand All @@ -18,7 +21,7 @@ async def create_article(
phrase: dict,
comment: str,
image_url: str,
published_at: str,
published_at: datetime,
category: MailTypeCategory,
session: AsyncSession,
) -> Articles:
Expand All @@ -31,11 +34,10 @@ async def create_article(
simple_title=simple_title,
simple_content=simple_content,
comment=comment,
published_at=datetime.strptime(published_at, '%Y-%m-%dT%H:%M:%S'),
published_at=published_at,
image_url=image_url,
category=category.name,
phrase=phrase,
probability_issue_finder=-1
),
session=session,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
import aiohttp
from fastapi import HTTPException
from langchain_core.messages import HumanMessage
from langchain_core.outputs import LLMResult
from langchain_openai import ChatOpenAI
from sqlalchemy.ext.asyncio import AsyncSession

from app.config.loguru_config import logger
from app.rag_lang_chain.chromadb_manager import ChromaDBManager
from app.rag_lang_chain.google_cse_retriver import (
AsyncGoogleSearchAPIWrapper,
GoogleCSERetriever,
)
from app.service.article_manage_service import ArticleManageService

from langchain.schema import Document

Expand Down Expand Up @@ -40,8 +43,9 @@ def to_dict(self) -> Dict:


async def request_rag_applied_openai(
original_text: str, # OriginalText: 기사 원문(Google Custom Search에 보낼 용도)
system_prompt: str, # SystemPrompt: 시스템 프롬프트
news_id: int,
system_prompt: str,
session : AsyncSession
) -> Dict:
openai_api_key = os.getenv("OPENAI_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")
Expand All @@ -52,14 +56,26 @@ async def request_rag_applied_openai(
api_key=google_api_key, cse_id=google_cse_id
)

# Step 0 : 기사 id값에 따른 기사 원문 가져오기
article_service = ArticleManageService()
article_by_id = await article_service.get_article_by_id(news_id, session)
original_text = article_by_id.content
if not original_text:
raise HTTPException(status_code=404, detail="Article not found.")


# Step 1: Google Custom Search API를 사용하여 사용자가 입력한 original_text 관련 정보 전부 수집
# original_text와 관련된 웹 페이지의 목록을 반환함. 각 웹 페이지는 title(검색 결과 제목), link(웹 페이지 url), snippet(검색 결과의 요약)으로 구성됨.
google_results = await google_cse_retriever.retrieve(
original_text
)
if not google_results:
raise HTTPException(status_code=404, detail="No results found from Google.")
logger.info(f"1. Google results: {google_results}")
if not original_text:
response = await openai_response(openai_api_key, system_prompt)
return RagAppliedResult(
result_text=response.generations[0][0].text,
related_documents=[],
).to_dict()

# Step 2: 검색 결과를 벡터화하고 ChromaDB에 저장
chroma_db_manager = ChromaDBManager()
Expand All @@ -71,38 +87,51 @@ async def request_rag_applied_openai(
additional_info = await search.aget_relevant_documents(original_text, num_results=3)
logger.info(f"3. Additional info: {additional_info}")

# Step 4: 프롬프트 생성(원문 + 검색 결과 + 추가 정보)

# Step 4: 프롬프트 생성 (원문 + 검색 결과 + 추가 정보)
rag_applied_prompt = await create_rag_applied_prompt(
original_prompt=system_prompt, relevant_info=search_results + additional_info
original_prompt=system_prompt, relevant_info=search_results + additional_info, original_text=original_text
)

# Step 5: OpenAI 요청 결과 반환
response = await openai_response(openai_api_key, rag_applied_prompt)

logger.info(f"최종 Response: {response}")

return RagAppliedResult(
result_text=response.generations[0][0].text,
related_documents=search_results + additional_info,
).to_dict()


# OpenAI 요청 결과 반환
async def openai_response(
openai_api_key: str,
prompt: str
) -> LLMResult:
try:
search_llm = ChatOpenAI(
temperature=0, model="gpt-4", max_tokens=1500, api_key=openai_api_key
)
response = await search_llm.agenerate(
messages=[[HumanMessage(rag_applied_prompt)]]
messages=[[HumanMessage(prompt)]]
)
return response
except aiohttp.ClientResponseError as e:
if e.status == 429:
raise HTTPException(
429, "Too many requests. Please try again later."
) from e
raise HTTPException(500, "Internal Server Error") from e

logger.info(f"최종 Response: {response}")

return RagAppliedResult(
result_text=response.generations[0][0].text,
related_documents=search_results + additional_info,
).to_dict()


async def create_rag_applied_prompt(
original_prompt: str, relevant_info: List[Union[Document, dict]]
original_prompt: str, relevant_info: List[Union[Document, dict]], original_text: str
) -> str:
# 원문 기사 추가
original_prompt += f"\n원문 기사:\n{original_text}\n\n"

# 관련 정보 추가
for idx, info in enumerate(relevant_info):
if isinstance(info, Document):
title = info.metadata.get("title", "제목 없음")
Expand All @@ -113,7 +142,7 @@ async def create_rag_applied_prompt(
link = info.get("link", "URL 없음")
snippet = info.get("snippet", "내용 없음")
original_prompt += (
f"\n{idx + 1}. 제목: {title}\n URL: {link}\n 내용: {snippet}\n"
f"관련된 기사 정보들 \n{idx + 1}. 제목: {title}\n URL: {link}\n 내용: {snippet}\n"
)

logger.info(f"RAG Applied Prompt: {original_prompt}")
Expand Down
2 changes: 1 addition & 1 deletion app/service/news_scheduling_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from app.config.loguru_config import logger
from app.database.session import db_session
from app.model.article_publisher import Publisher
from app.recommend.recommend_service import RecommendService
from app.service.article_manage_service import ArticleManageService
from app.service.simple_article_service import process_generate_article_by_url

Expand Down Expand Up @@ -87,7 +88,6 @@ async def run_crawl_and_store(session: AsyncSession):
)



async def schedule_task():
while True:
now = datetime.now()
Expand Down
Loading

0 comments on commit bc485b1

Please sign in to comment.