Skip to content

Commit

Permalink
feat:get more page
Browse files Browse the repository at this point in the history
  • Loading branch information
Guovin committed Feb 27, 2024
1 parent cbf6d58 commit 566218b
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 75 deletions.
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,12 @@

- 接口效验,过滤无效接口,排序规则:日期、速度、分辨率
- 定时执行,每隔 12 小时执行更新一次
- 可设置重点关注频道
- 可获取更多直播链接,数量可设置
- 关注频道默认保留模板接口,可手动更新接口至模板中
- 分页接口来源更新(todo)
- 可设置重点关注频道,单独配置获取分页的数量
- 分页结果获取(可配置数量)

## 使用方法

1. Fork 此项目,开启 Action 工作流可读写权限
2. 修改 demo.txt 文件,后续更新根据此文件内容进行更新
3. 修改 main.py(可选):importantList(关注频道),importantUrlsNum(更新关注频道的源数量,默认 20),filter_invalid_url(是否过滤无效接口,默认开启)
3. 修改 main.py(可选):importantList(关注频道名称列表),importantPageNum(关注频道获取分页数量,默认值:10),defaultPageNum(常规频道获取分页数量,默认值:5),filter_invalid_url(是否过滤无效接口,默认开启)
4. result.txt 为更新后的直播源文件,source.json 为数据源文件
151 changes: 81 additions & 70 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
import selenium
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os
import re
import requests
from selenium_stealth import stealth
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
import aiohttp
import asyncio
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
from datetime import datetime
import re
Expand All @@ -34,7 +31,8 @@ class GetSource:
"湖南卫视",
"翡翠台",
]
importantUrlsNum = 20
importantPageNum = 10
defaultPageNum = 5
filter_invalid_url = True

def __init__(self):
Expand Down Expand Up @@ -134,75 +132,88 @@ def outputTxt(self, cate, channelUrls):
f.write("\n")

async def visitPage(self, channelItems):
self.driver.get("https://www.foodieguide.com/iptvsearch/")
self.removeFile()
for cate, channelObj in channelItems.items():
channelUrls = {}
for name in channelObj.keys():
try:
element = self.driver.find_element(By.ID, "search")
element.clear()
element.send_keys(name)
self.driver.find_element(By.ID, "form1").find_element(
By.NAME, "Submit"
).click()
except NoSuchElementException:
print(f"Element not found when searching for {name}")
continue
infoList = []
isImportant = name in self.importantList
useNum = self.importantUrlsNum if isImportant else 10
soup = BeautifulSoup(self.driver.page_source, "html.parser")
tables_div = soup.find("div", class_="tables")
results = (
tables_div.find_all("div", class_="result") if tables_div else []
)
for result in results[:useNum]:
m3u8_div = result.find("div", class_="m3u8")
url = m3u8_div.text.strip() if m3u8_div else None
info_div = m3u8_div.find_next_sibling("div") if m3u8_div else None
date = resolution = None
if info_div:
info_text = info_div.text.strip()
date, resolution = (
(
info_text.partition(" ")[0]
if info_text.partition(" ")[0]
else None
),
(
info_text.partition(" ")[2].partition("•")[2]
if info_text.partition(" ")[2].partition("•")[2]
else None
),
pageNum = self.importantPageNum if isImportant else self.defaultPageNum
infoList = []
for page in range(1, pageNum):
try:
page_url = f"https://www.foodieguide.com/iptvsearch/?page={page}&s={name}"
self.driver.get(page_url)
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, "div.tables")
)
)
infoList.append((url, date, resolution))
infoList.sort(
key=lambda x: (
x[1] is not None,
datetime.strptime(x[1], "%m-%d-%Y") if x[1] else None,
),
reverse=True,
) # Sort by date
infoList = await self.compareSpeed(infoList) # Sort by speed

def extract_resolution(resolution_str):
numbers = re.findall(r"\d+x\d+", resolution_str)
if numbers:
width, height = map(int, numbers[0].split("x"))
return width * height
else:
return 0

infoList.sort(
key=lambda x: (
x[2] is not None,
extract_resolution(x[2]) if x[2] else 0,
),
reverse=True,
) # Sort by resolution
urls = list(dict.fromkeys(url for url, _, _ in infoList))
channelUrls[name] = urls
soup = BeautifulSoup(self.driver.page_source, "html.parser")
tables_div = soup.find("div", class_="tables")
results = (
tables_div.find_all("div", class_="result")
if tables_div
else []
)
if not any(
result.find("div", class_="m3u8") for result in results
):
break
for result in results:
m3u8_div = result.find("div", class_="m3u8")
url = m3u8_div.text.strip() if m3u8_div else None
info_div = (
m3u8_div.find_next_sibling("div") if m3u8_div else None
)
date = resolution = None
if info_div:
info_text = info_div.text.strip()
date, resolution = (
(
info_text.partition(" ")[0]
if info_text.partition(" ")[0]
else None
),
(
info_text.partition(" ")[2].partition("•")[2]
if info_text.partition(" ")[2].partition("•")[2]
else None
),
)
infoList.append((url, date, resolution))
except Exception as e:
print(f"Error on page {page}: {e}")
continue
try:
infoList.sort(
key=lambda x: (
x[1] is not None,
datetime.strptime(x[1], "%m-%d-%Y") if x[1] else None,
),
reverse=True,
) # Sort by date
infoList = await self.compareSpeed(infoList) # Sort by speed

def extract_resolution(resolution_str):
numbers = re.findall(r"\d+x\d+", resolution_str)
if numbers:
width, height = map(int, numbers[0].split("x"))
return width * height
else:
return 0

infoList.sort(
key=lambda x: (
x[2] is not None,
extract_resolution(x[2]) if x[2] else 0,
),
reverse=True,
) # Sort by resolution
urls = list(dict.fromkeys(url for url, _, _ in infoList))
channelUrls[name] = urls
except Exception as e:
print(f"Error on sorting: {e}")
continue
self.outputTxt(cate, channelUrls)
await asyncio.sleep(1)

Expand Down

0 comments on commit 566218b

Please sign in to comment.