Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update #109

Merged
merged 5 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
steps:
- name: Set branch name
id: vars
run: echo ::set-output name=branch::${{ github.repository_owner == 'Guovin' && 'gd' || 'master' }}
run: echo "branch=${{ github.repository_owner == 'Guovin' && 'gd' || 'master' }}" >> $GITHUB_ENV
- uses: actions/checkout@v3
with:
ref: ${{ steps.vars.outputs.branch }}
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# 更新日志(Changelog)

## v1.1.3

### 2024/5/8

- 优化频道接口不对应问题(#99)(Optimize the mismatch problem of the channel interface (#99))
- 处理 tqdm 安全问题(Handle the security issue of tqdm)
- 修改即将被废弃的命令(Modify the commands that are about to be deprecated)

## v1.1.2

### 2024/5/7
Expand Down
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ selenium = "4.19.0"
selenium-stealth = "1.0.6"
aiohttp = ">=3.9.4"
bs4 = "0.0.2"
tqdm = "4.66.2"
tqdm = ">=4.66.3"
async-timeout = "4.0.3"

[requires]
Expand Down
8 changes: 4 additions & 4 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 3 additions & 22 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@
from selenium.webdriver.support import expected_conditions as EC
from selenium_stealth import stealth
import asyncio
from bs4 import BeautifulSoup, NavigableString
from bs4 import BeautifulSoup
from utils import (
getChannelItems,
updateChannelUrlsTxt,
updateFile,
getChannelUrl,
getChannelInfo,
getResultsFromSoup,
sortUrlsBySpeedAndResolution,
getTotalUrls,
filterUrlsByPatterns,
Expand Down Expand Up @@ -124,25 +123,7 @@ async def visitPage(self, channelItems):
)
soup = BeautifulSoup(source, "html.parser")
if soup:
results = []
for element in soup.descendants:
if isinstance(element, NavigableString):
url = getChannelUrl(element)
if url and not any(
item[0] == url for item in results
):
url_element = soup.find(
lambda tag: tag.get_text(strip=True)
== url
)
if url_element:
info_element = (
url_element.find_next_sibling()
)
date, resolution = getChannelInfo(
info_element
)
results.append((url, date, resolution))
results = getResultsFromSoup(soup, name)
for result in results:
url, date, resolution = result
if url and checkUrlByPatterns(url):
Expand Down
56 changes: 46 additions & 10 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from urllib.parse import urlparse
import requests
import re
from bs4 import NavigableString


def getChannelItems():
Expand Down Expand Up @@ -44,16 +45,12 @@ def getChannelItems():
# This is a url, add it to the list of urls for the current channel.
match = re.search(pattern, line)
if match is not None:
if match.group(1) not in channels[current_category]:
channels[current_category][match.group(1)] = [match.group(2)]
elif (
match.group(2)
and match.group(2)
not in channels[current_category][match.group(1)]
):
channels[current_category][match.group(1)].append(
match.group(2)
)
name = match.group(1).strip()
url = match.group(2).strip()
if name not in channels[current_category]:
channels[current_category][name] = [url]
elif url and url not in channels[current_category][name]:
channels[current_category][name].append(url)
return channels
finally:
f.close()
Expand Down Expand Up @@ -171,6 +168,45 @@ def getChannelInfo(element):
return date, resolution


def checkNameMatch(name, result_name):
pattern = r"[a-zA-Z]+[_\-+]|cctv"
if re.search(
pattern,
result_name,
re.IGNORECASE,
):
print(
"Name test match:",
name.lower(),
result_name.lower(),
name.lower() == result_name.lower(),
)
return name.lower() == result_name.lower()
else:
return True


def getResultsFromSoup(soup, name):
"""
Get the results from the soup
"""
results = []
for element in soup.descendants:
if isinstance(element, NavigableString):
url = getChannelUrl(element)
if url and not any(item[0] == url for item in results):
url_element = soup.find(lambda tag: tag.get_text(strip=True) == url)
if url_element:
name_element = url_element.find_previous_sibling()
if name_element:
channel_name = name_element.get_text(strip=True)
if checkNameMatch(name, channel_name):
info_element = url_element.find_next_sibling()
date, resolution = getChannelInfo(info_element)
results.append((url, date, resolution))
return results


async def getSpeed(url, urlTimeout=5):
"""
Get the speed of the url
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "1.1.2"
"version": "1.1.3"
}
Loading