From 56098236cba5ac4c4592bfd56550e3fd7ad45e69 Mon Sep 17 00:00:00 2001 From: somsom13 Date: Sat, 21 May 2022 01:29:19 +0900 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=9D=20add:=203=EB=B0=A9=EC=86=A1?= =?UTF-8?q?=EC=82=AC=20=ED=94=84=EB=A1=9C=EA=B7=B8=EB=9E=A8=20=ED=81=AC?= =?UTF-8?q?=EB=A1=A4=EB=A7=81=20=EA=B5=AC=ED=98=84=20(#25)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../backend/video/service/ProgramCrawler.java | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 src/main/java/com/example/backend/video/service/ProgramCrawler.java diff --git a/src/main/java/com/example/backend/video/service/ProgramCrawler.java b/src/main/java/com/example/backend/video/service/ProgramCrawler.java new file mode 100644 index 0000000..816b426 --- /dev/null +++ b/src/main/java/com/example/backend/video/service/ProgramCrawler.java @@ -0,0 +1,147 @@ +package com.example.backend.video.service; + +import com.nimbusds.oauth2.sdk.util.date.SimpleDate; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.springframework.stereotype.Service; + +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +@Service +public class ProgramCrawler { + + public static void main(String[] args){ + + System.out.println("------------------KBS-----------------"); + kbs(); + System.out.println("------------------SBS-----------------"); + sbs(); + System.out.println("------------------MBC-----------------"); + mbc(2); //2: 현재방송 / 3: 종영 + + } + + + private static List sbs(){ + String url1="https://apis.sbs.co.kr/main-api/section/tv?pgm_sct=ET&sort=new&offset="; + String url2="&limit=30"; + int offset=0; + Document document = null; + List sbsProgram=new ArrayList<>(); + while(true){ + String url=url1+offset+url2; + try { + String html= Jsoup.connect(url). + ignoreContentType(true) + .execute().body(); + if(!html.contains("title")) + break; + JSONArray array = new JSONArray(html); + + for(int i=0;i mbc(int state){ + String url1="https://control.imbc.com/TV/Program?callback=Program_2_1_1_3_2_0_0_0_"; + String url2="&subCategoryId=2&curPage="; + String url3="&pageSize=100&order=3&broadState="+state+"&endYear=0&initial=&genre=0"; + int page=1; + SimpleDateFormat format=new SimpleDateFormat("yyyyMMdd"); + String date= format.format(new Date()); + Document document = null; + List mbcProgram=new ArrayList<>(); + while(true){ + String url=url1+date+url2+page+url3; + try { + String html= Jsoup.connect(url). + ignoreContentType(true) + .execute().body(); + + if(!html.contains("Title")) + break; + + List head = findIndexes("(", html); + List tail = findIndexes(")", html); + + String parsed=html.substring(head.get(0)+1,tail.get(tail.size()-1)); + JSONObject jsnobject = new JSONObject(parsed); + + JSONArray jsonArray = jsnobject.getJSONArray("List"); + for (int i = 0; i < jsonArray.length(); i++) { + JSONObject jsonObject = jsonArray.getJSONObject(i); + String title=jsonObject.getString("Title"); + System.out.println("title = " + title); + mbcProgram.add(title); + } + page++; + + } catch (IOException e) { + e.printStackTrace(); + break; + } + } + return mbcProgram; + } + + private static List kbs(){ + String url1="https://pprogramapi.kbs.co.kr/api/v1/external/program?end_yn=n§ion_code=04&page="; + String url2="&page_size=12&rtype=jsonp&show_yn=Y&sort_option=rdatetime%20desc&dict=Y&callback=section3"; + int page=1; + Document document; + List kbsProgram=new ArrayList<>(); + while(true){ + String url=url1+page+url2; + try { + document = Jsoup.connect(url).get(); + Element body = document.body(); + String html = body.html(); + //title + 3 + 13 / program_day_of_week - 3 + List title = findIndexes("program_title", html); + List program_day_of_week = findIndexes("official_sns_instagram", html); + if(title.isEmpty()) + break; + for(int i=0;i