Skip to content

Commit

Permalink
Merge branch 'refs/heads/master' into gd
Browse files Browse the repository at this point in the history
# Conflicts:
#	result.log
#	result.txt
  • Loading branch information
Guovin committed May 7, 2024
2 parents 14a80e8 + 0b4518c commit a531fd3
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 36 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
*.log
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# 更新日志(Changelog)

## v1.1.2

### 2024/5/7

- 重构接口获取方法,增强通用性,适应结构变更(Refactored the method for obtaining the interface, enhanced its universality, and adapted to structural changes)
- 修复 gd 分支自动更新问题(#105)(Fixed the automatic update issue of the gd branch (#105))
- 优化自定义接口源获取,接口去重(Optimized the acquisition of custom interface sources and removed duplicate interfaces)

## v1.1.1

### 2024/4/29

- 为避免代码合并冲突,移除 master 分支作为运行更新工作流,master 仅作为新功能发布分支,有使用我的链接的小伙伴请修改使用 gd 分支(void code merge conflicts, the master branch has been removed as the branch for running update workflows. The master branch is now only used for releasing new features. If you are using my link, please modify it to use the gd branch)

## v1.1.0

### 2024/4/26
Expand Down
2 changes: 1 addition & 1 deletion demo.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
广东频道,#genre#
广东频道,#genre#
广东珠江,http://113.86.204.95:9999/udp/239.77.0.1:5146
开平综合,http://php.jdshipin.com:8880/chudian.php?id=kpzh
开平生活,http://php.jdshipin.com:8880/chudian.php?id=kpsh
Expand Down
43 changes: 32 additions & 11 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
from selenium.webdriver.support import expected_conditions as EC
from selenium_stealth import stealth
import asyncio
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, NavigableString
from utils import (
getChannelItems,
updateChannelUrlsTxt,
updateFile,
getUrlInfo,
getChannelUrl,
getChannelInfo,
sortUrlsBySpeedAndResolution,
getTotalUrls,
filterUrlsByPatterns,
Expand All @@ -25,6 +26,7 @@
from logging.handlers import RotatingFileHandler
import os
from tqdm import tqdm
import re

handler = RotatingFileHandler("result_new.log", encoding="utf-8")
logging.basicConfig(
Expand Down Expand Up @@ -114,18 +116,37 @@ async def visitPage(self, channelItems):
self.driver.execute_script(
"arguments[0].click();", page_link
)
soup = BeautifulSoup(self.driver.page_source, "html.parser")
results = (
soup.find_all("div", class_="result") if soup else []
source = re.sub(
r"<!--.*?-->",
"",
self.driver.page_source,
flags=re.DOTALL,
)
for result in results:
try:
url, date, resolution = getUrlInfo(result)
soup = BeautifulSoup(source, "html.parser")
if soup:
results = []
for element in soup.descendants:
if isinstance(element, NavigableString):
url = getChannelUrl(element)
if url and not any(
item[0] == url for item in results
):
url_element = soup.find(
lambda tag: tag.get_text(strip=True)
== url
)
if url_element:
info_element = (
url_element.find_next_sibling()
)
date, resolution = getChannelInfo(
info_element
)
results.append((url, date, resolution))
for result in results:
url, date, resolution = result
if url and checkUrlByPatterns(url):
infoList.append((url, date, resolution))
except Exception as e:
print(f"Error on result {result}: {e}")
continue
except Exception as e:
print(f"Error on page {page}: {e}")
continue
Expand Down
51 changes: 29 additions & 22 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ async def getChannelsByExtendBaseUrls(channel_names):
url = re.match(pattern, line).group(2)
value = (url, None, resolution)
if key in link_dict:
link_dict[key].append(value)
if value not in link_dict[key]:
link_dict[key].append(value)
else:
link_dict[key] = [value]
found_channels = []
Expand Down Expand Up @@ -137,31 +138,37 @@ def updateFile(final_file, old_file):
os.replace(old_file, final_file)


def getUrlInfo(result):
def getChannelUrl(element):
"""
Get the url, date and resolution
"""
url = date = resolution = None
result_div = [div for div in result.children if div.name == "div"]
if 1 < len(result_div):
channel_text = result_div[1].get_text(strip=True)
url_match = re.search(
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
channel_text,
url = None
urlRegex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
url_search = re.search(
urlRegex,
element.get_text(strip=True),
)
if url_search:
url = url_search.group()
return url


def getChannelInfo(element):
"""
Get the channel info
"""
date, resolution = None, None
info_text = element.get_text(strip=True)
if info_text:
date, resolution = (
(info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
(
info_text.partition(" ")[2].partition("•")[2]
if info_text.partition(" ")[2].partition("•")[2]
else None
),
)
if url_match is not None:
url = url_match.group()
info_text = result_div[-1].get_text(strip=True)
if info_text:
date, resolution = (
(info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
(
info_text.partition(" ")[2].partition("•")[2]
if info_text.partition(" ")[2].partition("•")[2]
else None
),
)
return url, date, resolution
return date, resolution


async def getSpeed(url, urlTimeout=5):
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "1.1.0"
"version": "1.1.2"
}

0 comments on commit a531fd3

Please sign in to comment.