Skip to content

Commit

Permalink
fix : null 값들에 대해 default 값 insert하도록 수정
Browse files Browse the repository at this point in the history
  • Loading branch information
Yujin-nKim committed Apr 8, 2024
1 parent f9f8c15 commit 74b1dee
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 105 deletions.
4 changes: 3 additions & 1 deletion mysql/insert_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,23 @@ def insert_publisher(cursor, publisher_name):

def insert_book_info(cursor, book_info, publisher_id):
"""books 테이블에 도서 데이터를 insert합니다. 추가된 도서의 primary key를 반환합니다."""

insert_query = ("INSERT INTO books (publisher_id, book_name, book_index, book_desc, book_isbn_13, book_price, "
"book_discount, book_package, book_published, book_stock) VALUES (%s, %s, %s, %s, %s, %s, %s, %s,"
" %s, %s)")

execute_query(cursor, insert_query,
(publisher_id, book_info['book_name'],
preprocessing_book_index(book_info['book_index']), book_info["book_desc"],
book_info['book_isbn_13'], book_info['book_price'], generate_book_discount(),
generate_book_package(), preprocessing_book_published(book_info['book_published']),
generate_book_stock(),))

inserted_book_id = cursor.lastrowid

logger.info(f"책 이름 : '{book_info['book_name']}', Primary key : '{inserted_book_id}' 데이터가 추가되었습니다.")
return inserted_book_id


def insert_book_thumbnail(cursor, book_id, image_url):
"""book_thumbnails 테이블에 썸네일 이미지 데이터를 insert합니다."""
insert_query = "INSERT INTO book_thumbnails (book_id, thumbnail_image_url) VALUES (%s, %s)"
Expand Down
2 changes: 1 addition & 1 deletion openAPI/book_data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,4 @@ def read_csv_to_list(file_path):

conn.commit()
conn.close()
print("MySQL 연결이 닫혔습니다.")
print("MySQL 연결이 닫혔습니다.")
59 changes: 47 additions & 12 deletions openAPI/openAPI_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from bs4 import BeautifulSoup
import secret
import logging
import re

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Expand All @@ -18,15 +19,12 @@ def openAPI_request_search(query, category_id):
'CategoryId': category_id
}

url = secret.aladin_open_api_search_url

response = requests.get(url, params=params)
response = requests.get(secret.aladin_open_api_search_url, params=params)

if response.status_code == 200:
# xml 내용

content = response.text
soup = BeautifulSoup(content, 'xml')

data = soup.find_all("item")

isbn_list = []
Expand All @@ -43,6 +41,11 @@ def openAPI_request_search(query, category_id):
print('Error:', response.status_code)


def remove_image_tags(text):
"""문자열에서 <img> 태그와 주변 문자열 제거"""
return re.sub(r'<img[^>]+>', '', text)


def openAPI_request_detail(isbn_id):
"""알라딘 openAPI 도서 상세 요청"""

Expand All @@ -52,43 +55,75 @@ def openAPI_request_detail(isbn_id):
'ItemId': isbn_id
}

url = secret.aladin_open_api_detail_url

response = requests.get(url, params=params)
response = requests.get(secret.aladin_open_api_detail_url, params=params)

if response.status_code == 200:

content = response.text
soup = BeautifulSoup(content, 'xml')

data = soup.find("item")

book_info = {}

# 출판사
publisher = data.find("publisher").get_text()
if not publisher:
publisher = "DefaultPublisherName"

# 책 이름
book_info["book_name"] = data.find("title").get_text()
if not book_info["book_name"]:
book_info["book_name"] = "DefaultBookName"

# 책 목차
book_info["book_index"] = data.find("bookinfo").find("toc").get_text()
if not book_info["book_index"]:
book_info["book_index"] = "DefaultBookIndex"

# 책 설명
book_info["book_desc"] = data.find("description").get_text()
book_info["book_desc"] = remove_image_tags(data.find("description").get_text())
if not book_info["book_desc"]:
book_info["book_desc"] = "DefaultBookDescription"

# 책 ISBN-13
book_info["book_isbn_13"] = data.find("isbn13").get_text()
if not book_info["book_isbn_13"]:
book_info["book_isbn_13"] = "DefaultBookIsbn13"

# 책 가격
book_info["book_price"] = data.find("priceStandard").get_text()
if not book_info["book_price"]:
book_info["book_price"] = "10000"

# 출판일
book_info["book_published"] = data.find("pubDate").get_text()
if not book_info["book_published"]:
book_info["book_published"] = "2024-01-01"

# 책 커버 이미지 URL
book_thumbnail_image_url = data.find("cover").get_text()

# 책 미리보기 이미지 URL들
book_image_list = [img.get_text() for img in data.find("bookinfo").find_all("letslookimg")]
if not book_image_list:
book_image_list = ["no-image"]

# 작가들
author_list = [{"authorType": author.get("authorType"), "desc": author.get("desc"), "name": author.get_text()}
for author in data.find("bookinfo").find("authors").find_all("author")]
author_list = []
for author in data.find("bookinfo").find("authors").find_all("author"):
authorType = author.get("authorType")
desc = author.get("desc")
name = author.get_text()

if not authorType:
authorType = "DefaultAuthorType"
if not desc:
desc = "DefaultAuthorDesc"
if not name:
name = "DefaultAuthorName"

author_info = {"authorType": authorType, "desc": desc, "name": name}
author_list.append(author_info)

# 파싱된 값들을 반환
return {
Expand Down
Loading

0 comments on commit 74b1dee

Please sign in to comment.