Skip to content

Commit

Permalink
feat: Update Code to V3.0 Ver.
Browse files Browse the repository at this point in the history
1. Fix code syntax errors.
2. Modify the code layout.
3. Delete unnecessary files.
  • Loading branch information
mxdabc committed Oct 3, 2024
1 parent 77e92d7 commit 8ffe311
Show file tree
Hide file tree
Showing 44 changed files with 2,510 additions and 2,484 deletions.
339 changes: 219 additions & 120 deletions crawl/crawl.py

Large diffs are not rendered by default.

223 changes: 111 additions & 112 deletions crawl/spiders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,123 +1,122 @@
#package spider
#__init__.py
# package spider
# __init__.py

from crawl.spiders.cctv import get_epgs_cctv,get_channels_cctv
from crawl.spiders.tbc import get_epgs_tbc,get_channels_tbc
from crawl.spiders.tvmao import get_epgs_tvmao2,get_channels_tvmao
from crawl.spiders.zhongshu import get_epgs_zhongshu,get_channels_zhongshu
from crawl.spiders.cabletv import get_epgs_cabletv,get_channels_cabletv
from crawl.spiders.g4tv import get_epgs_4gtv,get_channels_4gtv
from crawl.spiders.mod import get_epgs_mod,get_channels_mod
from crawl.spiders.tvb import get_epgs_tvb,get_channels_tvb
from crawl.spiders.nowtv import get_epgs_nowtv,get_channels_nowtv
from crawl.spiders.gdtv import get_epgs_gdtv,get_channels_gdtv
from crawl.spiders.icable import get_epgs_icable,get_channels_icable
from crawl.spiders.btv import get_epgs_btv,get_channels_btv
from crawl.spiders.tvsou import get_epgs_tvsou,get_channels_tvsou
from crawl.spiders.hks import get_epgs_hks,get_channels_hks
from crawl.spiders.viu import get_epgs_viu,get_channels_viu
from crawl.spiders.chuanliu import get_channels_chuanliu,get_epgs_chuanliu
from crawl.spiders.mytvsuper import get_epgs_mytvsuper,get_channels_mytvsuper
from crawl.spiders.gxntv import get_epgs_gxntv,get_channels_gxntv
from crawl.spiders.cctv import get_epgs_cctv, get_channels_cctv
from crawl.spiders.tbc import get_epgs_tbc, get_channels_tbc
from crawl.spiders.tvmao import get_epgs_tvmao2, get_channels_tvmao
from crawl.spiders.zhongshu import get_epgs_zhongshu, get_channels_zhongshu
from crawl.spiders.cabletv import get_epgs_cabletv, get_channels_cabletv
from crawl.spiders.g4tv import get_epgs_4gtv, get_channels_4gtv
from crawl.spiders.mod import get_epgs_mod, get_channels_mod
from crawl.spiders.tvb import get_epgs_tvb, get_channels_tvb
from crawl.spiders.nowtv import get_epgs_nowtv, get_channels_nowtv
from crawl.spiders.gdtv import get_epgs_gdtv, get_channels_gdtv
from crawl.spiders.icable import get_epgs_icable, get_channels_icable
from crawl.spiders.btv import get_epgs_btv, get_channels_btv
from crawl.spiders.tvsou import get_epgs_tvsou, get_channels_tvsou
from crawl.spiders.hks import get_epgs_hks, get_channels_hks
from crawl.spiders.viu import get_epgs_viu, get_channels_viu
from crawl.spiders.chuanliu import get_channels_chuanliu, get_epgs_chuanliu
from crawl.spiders.mytvsuper import get_epgs_mytvsuper, get_channels_mytvsuper
from crawl.spiders.gxntv import get_epgs_gxntv, get_channels_gxntv
from utils.general import chuanliu_Authorization
from crawl.spiders.sdtv import get_epgs_sdtv,get_channels_sdtv
from crawl.spiders.sdtv import get_epgs_sdtv, get_channels_sdtv

epg_funcs = {
'tvmao':get_epgs_tvmao2,
'tbc':get_epgs_tbc,
'cctv':get_epgs_cctv,
'zhongshu':get_epgs_zhongshu,
'cabletv':get_epgs_cabletv,
'tvsou':get_epgs_tvsou,
'4gtv':get_epgs_4gtv,
'mod':get_epgs_mod,
'tvb':get_epgs_tvb,
'nowtv':get_epgs_nowtv,
'icable':get_epgs_icable,
'gdtv':get_epgs_gdtv,
'btv':get_epgs_btv,
'hks':get_epgs_hks,
'viu':get_epgs_viu,
'chuanliu':get_epgs_chuanliu,
'mytvsuper':get_epgs_mytvsuper,
'gxntv':get_epgs_gxntv,
'sdtv':get_epgs_sdtv,
} #所有EPG的接口
"tvmao": get_epgs_tvmao2,
"tbc": get_epgs_tbc,
"cctv": get_epgs_cctv,
"zhongshu": get_epgs_zhongshu,
"cabletv": get_epgs_cabletv,
"tvsou": get_epgs_tvsou,
"4gtv": get_epgs_4gtv,
"mod": get_epgs_mod,
"tvb": get_epgs_tvb,
"nowtv": get_epgs_nowtv,
"icable": get_epgs_icable,
"gdtv": get_epgs_gdtv,
"btv": get_epgs_btv,
"hks": get_epgs_hks,
"viu": get_epgs_viu,
"chuanliu": get_epgs_chuanliu,
"mytvsuper": get_epgs_mytvsuper,
"gxntv": get_epgs_gxntv,
"sdtv": get_epgs_sdtv,
} # 所有EPG的接口
epg_source = {
'tvmao':get_channels_tvmao,
'tbc':get_channels_tbc,
'cctv':get_channels_cctv,
'zhongshu':get_channels_zhongshu,
'cabletv':get_channels_cabletv,
'tvsou':get_channels_tvsou,
'4gtv':get_channels_4gtv,
'mod':get_channels_mod,
'tvb':get_channels_tvb,
'nowtv':get_channels_nowtv,
'icable':get_channels_icable,
'gdtv':get_channels_gdtv,
'btv':get_channels_btv,
'hks':get_channels_hks,
'viu':get_channels_viu,
'chuanliu':get_channels_chuanliu,
'mytvsuper':get_channels_mytvsuper,
'gxntv':get_channels_gxntv,
'sdtv':get_channels_sdtv,
}
"tvmao": get_channels_tvmao,
"tbc": get_channels_tbc,
"cctv": get_channels_cctv,
"zhongshu": get_channels_zhongshu,
"cabletv": get_channels_cabletv,
"tvsou": get_channels_tvsou,
"4gtv": get_channels_4gtv,
"mod": get_channels_mod,
"tvb": get_channels_tvb,
"nowtv": get_channels_nowtv,
"icable": get_channels_icable,
"gdtv": get_channels_gdtv,
"btv": get_channels_btv,
"hks": get_channels_hks,
"viu": get_channels_viu,
"chuanliu": get_channels_chuanliu,
"mytvsuper": get_channels_mytvsuper,
"gxntv": get_channels_gxntv,
"sdtv": get_channels_sdtv,
}
func_args = {
'tvmao':0,
'tbc':0,
'cctv':0,
'zhongshu':0,
'cabletv':0,
'tvsou':0,
'4gtv':0,
'mod':0,
'tvb':0,
'nowtv':0,
'icable':0,
'gdtv':0,
'btv':0,
'hks':0,
'viu':0,
'chuanliu':chuanliu_Authorization,
'mytvsuper':0,
'gxntv':0,
'sdtv':0,
}
def epg_func(channel,id,dt,func_arg=0,source = 0):
"tvmao": 0,
"tbc": 0,
"cctv": 0,
"zhongshu": 0,
"cabletv": 0,
"tvsou": 0,
"4gtv": 0,
"mod": 0,
"tvb": 0,
"nowtv": 0,
"icable": 0,
"gdtv": 0,
"btv": 0,
"hks": 0,
"viu": 0,
"chuanliu": chuanliu_Authorization,
"mytvsuper": 0,
"gxntv": 0,
"sdtv": 0,
}


def epg_func(channel, id, dt, func_arg=0, source=0):
if source:
source1 = source
else:
source1 = channel.source
func_arg = func_args[source1] #if func_arg else func_arg
return epg_funcs[source1](channel,id,dt,func_arg)

__all__ = ['get_epgs_4gtv',
'get_epgs_btv',
'get_epgs_cabletv',
'get_epgs_cctv',
'get_epgs_gdtv',
'get_epgs_icable',
'get_epgs_mod',
'get_epgs_nowtv',
'get_epgs_tbc',
'get_epgs_tvb',
'get_epgs_tvmao2',
'get_epgs_zhongshu',
'get_epgs_tvsou',
'get_epgs_hks',
'get_epgs_viu',
'get_epgs_chuanliu',
'get_epgs_mytvsuper',
'get_epgs_gxntv',
'get_epgs_sdtv',
'epg_funcs',
'func_args',
'epg_func',

]



func_arg = func_args[source1] # if func_arg else func_arg
return epg_funcs[source1](channel, id, dt, func_arg)


__all__ = [
"get_epgs_4gtv",
"get_epgs_btv",
"get_epgs_cabletv",
"get_epgs_cctv",
"get_epgs_gdtv",
"get_epgs_icable",
"get_epgs_mod",
"get_epgs_nowtv",
"get_epgs_tbc",
"get_epgs_tvb",
"get_epgs_tvmao2",
"get_epgs_zhongshu",
"get_epgs_tvsou",
"get_epgs_hks",
"get_epgs_viu",
"get_epgs_chuanliu",
"get_epgs_mytvsuper",
"get_epgs_gxntv",
"get_epgs_sdtv",
"epg_funcs",
"func_args",
"epg_func",
]
113 changes: 65 additions & 48 deletions crawl/spiders/btv.py
Original file line number Diff line number Diff line change
@@ -1,87 +1,104 @@
# -*- coding:utf-8 -*-
# 北京电视台官方来源 10 个频道
#2022-11-03官方更改接口
#https://dynamic.rbc.cn/bvradio_app/service/LIVE?functionName=getCurrentChannel&channelId=135&curdate=2022-11-01
# 2022-11-03官方更改接口
# https://dynamic.rbc.cn/bvradio_app/service/LIVE?functionName=getCurrentChannel&channelId=135&curdate=2022-11-01
# http://jiemudan.brtv.org.cn/index.html?channel=TvCh1602660467213184 地址
# http://www.brtv.org.cn/mobileinf/rest/cctv/videolivelist/dayWeb?json={'id':'TvCh1602660467213184','day':'2021-12-15'} 接口
from bs4 import BeautifulSoup as bs
import requests, datetime,os
import requests, datetime, os
from utils.general import headers


def get_epgs_btv(channel, channel_id, dt, func_arg):
epgs = []
msg = ''
msg = ""
success = 1
need_date = dt.strftime('%Y-%m-%d')
url = "https://dynamic.rbc.cn/bvradio_app/service/LIVE?functionName=getCurrentChannel&channelId=%s&curdate=%s"%(channel_id, need_date)
need_date = dt.strftime("%Y-%m-%d")
url = (
"https://dynamic.rbc.cn/bvradio_app/service/LIVE?functionName=getCurrentChannel&channelId=%s&curdate=%s"
% (channel_id, need_date)
)

try:
res = requests.get(url, headers=headers,timeout=5)
res.encoding = 'utf-8'
res_j = res.json()['channel']['programes']
res = requests.get(url, headers=headers, timeout=5)
res.encoding = "utf-8"
res_j = res.json()["channel"]["programes"]
old_dt = datetime.datetime(1999, 12, 31, 12, 12)
n = 0 #计数 节目表的第几个节目
n = 0 # 计数 节目表的第几个节目
max_n = len(res_j)
for epga in res_j:
n += 1
starttime = epga['startTime']
endtime = epga['endTime']
title = epga['name']
starttime = datetime.datetime.strptime(need_date + starttime, '%Y-%m-%d%H:%M')
if n == max_n and endtime[:2] == '00':
endtime = datetime.datetime.strptime(need_date + endtime, '%Y-%m-%d%H:%M')
starttime = epga["startTime"]
endtime = epga["endTime"]
title = epga["name"]
starttime = datetime.datetime.strptime(
need_date + starttime, "%Y-%m-%d%H:%M"
)
if n == max_n and endtime[:2] == "00":
endtime = datetime.datetime.strptime(
need_date + endtime, "%Y-%m-%d%H:%M"
)
endtime = endtime + datetime.timedelta(days=1)
else:
endtime = datetime.datetime.strptime(need_date + endtime, '%Y-%m-%d%H:%M')
epg = {'channel_id': channel.id,
'starttime': starttime,
'endtime': endtime,
'title': title,
'desc': '',
'program_date': dt,
}
endtime = datetime.datetime.strptime(
need_date + endtime, "%Y-%m-%d%H:%M"
)
epg = {
"channel_id": channel.id,
"starttime": starttime,
"endtime": endtime,
"title": title,
"desc": "",
"program_date": dt,
}
epgs.append(epg)
except Exception as e:
success = 0
spidername = os.path.basename(__file__).split('.')[0]
msg = 'spider-%s- %s' % (spidername,e)
spidername = os.path.basename(__file__).split(".")[0]
msg = "spider-%s- %s" % (spidername, e)
ret = {
'success': success,
'epgs': epgs,
'msg': msg,
'last_program_date': dt,
'ban':0,
"success": success,
"epgs": epgs,
"msg": msg,
"last_program_date": dt,
"ban": 0,
}
return ret


def get_channels_btv():
channels = []
url= 'https://www.brtv.org.cn/gbdsb.shtml'
url = "https://www.brtv.org.cn/gbdsb.shtml"
res = requests.get(url, headers=headers)
res.encoding = 'utf-8'
soup = bs(res.text, 'html.parser')
lis = soup.select('div.conWrapper > div.templateBox > ul > li')
res.encoding = "utf-8"
soup = bs(res.text, "html.parser")
lis = soup.select("div.conWrapper > div.templateBox > ul > li")
for li in lis:
name = li.div.text.replace('\n','').strip()
id = li.attrs['channelid']
name = li.div.text.replace("\n", "").strip()
id = li.attrs["channelid"]
channel = {
'name': name,
'id': [id],
'url': 'https://www.brtv.org.cn/gbdsb.shtml',#url,
'source': 'btv',
'logo': '',
'desc': '',
'sort': '北京',
"name": name,
"id": [id],
"url": "https://www.brtv.org.cn/gbdsb.shtml", # url,
"source": "btv",
"logo": "",
"desc": "",
"sort": "北京",
}
channels.append(channel)
infos = soup.select('div.introductionWrapper')
infos = soup.select("div.introductionWrapper")
n = 0
for info in infos:
desc = info.text.strip().replace(' ','').replace('\t','').replace('\r','').replace('\n\n','\n').replace('\n\n','\n')
if '青年频道' in desc:
desc = (
info.text.strip()
.replace(" ", "")
.replace("\t", "")
.replace("\r", "")
.replace("\n\n", "\n")
.replace("\n\n", "\n")
)
if "青年频道" in desc:
continue
channels[n]['desc'] = desc
channels[n]["desc"] = desc
n += 1
return channels
Loading

0 comments on commit 8ffe311

Please sign in to comment.