Merge branch 'refs/heads/master' into gd

# Conflicts: # result.log # result.txt
Guovin · May 7, 2024 · a531fd3 · a531fd3
2 parents 14a80e8 + 0b4518c
commit a531fd3
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 36 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +0,0 @@
-*.log

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # 更新日志（Changelog）
 
+## v1.1.2
+
+### 2024/5/7
+
+- 重构接口获取方法，增强通用性，适应结构变更（Refactored the method for obtaining the interface, enhanced its universality, and adapted to structural changes）
+- 修复 gd 分支自动更新问题（#105）（Fixed the automatic update issue of the gd branch (#105)）
+- 优化自定义接口源获取，接口去重（Optimized the acquisition of custom interface sources and removed duplicate interfaces）
+
+## v1.1.1
+
+### 2024/4/29
+
+- 为避免代码合并冲突，移除 master 分支作为运行更新工作流，master 仅作为新功能发布分支，有使用我的链接的小伙伴请修改使用 gd 分支（void code merge conflicts, the master branch has been removed as the branch for running update workflows. The master branch is now only used for releasing new features. If you are using my link, please modify it to use the gd branch）
+
 ## v1.1.0
 
 ### 2024/4/26

diff --git a/demo.txt b/demo.txt
@@ -1,4 +1,4 @@
-广东频道,#genre#
+广东频道,#genre#
 广东珠江,http://113.86.204.95:9999/udp/239.77.0.1:5146
 开平综合,http://php.jdshipin.com:8880/chudian.php?id=kpzh
 开平生活,http://php.jdshipin.com:8880/chudian.php?id=kpsh

diff --git a/main.py b/main.py
@@ -8,12 +8,13 @@
 from selenium.webdriver.support import expected_conditions as EC
 from selenium_stealth import stealth
 import asyncio
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, NavigableString
 from utils import (
     getChannelItems,
     updateChannelUrlsTxt,
     updateFile,
-    getUrlInfo,
+    getChannelUrl,
+    getChannelInfo,
     sortUrlsBySpeedAndResolution,
     getTotalUrls,
     filterUrlsByPatterns,
@@ -25,6 +26,7 @@
 from logging.handlers import RotatingFileHandler
 import os
 from tqdm import tqdm
+import re
 
 handler = RotatingFileHandler("result_new.log", encoding="utf-8")
 logging.basicConfig(
@@ -114,18 +116,37 @@ async def visitPage(self, channelItems):
                                 self.driver.execute_script(
                                     "arguments[0].click();", page_link
                                 )
-                            soup = BeautifulSoup(self.driver.page_source, "html.parser")
-                            results = (
-                                soup.find_all("div", class_="result") if soup else []
+                            source = re.sub(
+                                r"<!--.*?-->",
+                                "",
+                                self.driver.page_source,
+                                flags=re.DOTALL,
                             )
-                            for result in results:
-                                try:
-                                    url, date, resolution = getUrlInfo(result)
+                            soup = BeautifulSoup(source, "html.parser")
+                            if soup:
+                                results = []
+                                for element in soup.descendants:
+                                    if isinstance(element, NavigableString):
+                                        url = getChannelUrl(element)
+                                        if url and not any(
+                                            item[0] == url for item in results
+                                        ):
+                                            url_element = soup.find(
+                                                lambda tag: tag.get_text(strip=True)
+                                                == url
+                                            )
+                                            if url_element:
+                                                info_element = (
+                                                    url_element.find_next_sibling()
+                                                )
+                                                date, resolution = getChannelInfo(
+                                                    info_element
+                                                )
+                                                results.append((url, date, resolution))
+                                for result in results:
+                                    url, date, resolution = result
                                     if url and checkUrlByPatterns(url):
                                         infoList.append((url, date, resolution))
-                                except Exception as e:
-                                    print(f"Error on result {result}: {e}")
-                                    continue
                         except Exception as e:
                             print(f"Error on page {page}: {e}")
                             continue

diff --git a/utils.py b/utils.py
@@ -91,7 +91,8 @@ async def getChannelsByExtendBaseUrls(channel_names):
                         url = re.match(pattern, line).group(2)
                         value = (url, None, resolution)
                         if key in link_dict:
-                            link_dict[key].append(value)
+                            if value not in link_dict[key]:
+                                link_dict[key].append(value)
                         else:
                             link_dict[key] = [value]
                 found_channels = []
@@ -137,31 +138,37 @@ def updateFile(final_file, old_file):
         os.replace(old_file, final_file)
 
 
-def getUrlInfo(result):
+def getChannelUrl(element):
     """
     Get the url, date and resolution
     """
-    url = date = resolution = None
-    result_div = [div for div in result.children if div.name == "div"]
-    if 1 < len(result_div):
-        channel_text = result_div[1].get_text(strip=True)
-        url_match = re.search(
-            r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
-            channel_text,
+    url = None
+    urlRegex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
+    url_search = re.search(
+        urlRegex,
+        element.get_text(strip=True),
+    )
+    if url_search:
+        url = url_search.group()
+    return url
+
+
+def getChannelInfo(element):
+    """
+    Get the channel info
+    """
+    date, resolution = None, None
+    info_text = element.get_text(strip=True)
+    if info_text:
+        date, resolution = (
+            (info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
+            (
+                info_text.partition(" ")[2].partition("•")[2]
+                if info_text.partition(" ")[2].partition("•")[2]
+                else None
+            ),
         )
-        if url_match is not None:
-            url = url_match.group()
-        info_text = result_div[-1].get_text(strip=True)
-        if info_text:
-            date, resolution = (
-                (info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
-                (
-                    info_text.partition(" ")[2].partition("•")[2]
-                    if info_text.partition(" ")[2].partition("•")[2]
-                    else None
-                ),
-            )
-    return url, date, resolution
+    return date, resolution
 
 
 async def getSpeed(url, urlTimeout=5):

diff --git a/version.json b/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "1.1.0"
+  "version": "1.1.2"
 }