From 5e410152bae2cd4303cc75c85cdfa56103c1d9f8 Mon Sep 17 00:00:00 2001
From: EXP <289065406@qq.com>
Date: Sat, 22 Oct 2022 13:34:02 +0800
Subject: [PATCH] Release: 2.0.0
---
README.md | 17 --------
gen_pdm.py | 2 +-
main.py | 83 +++++++++++++++---------------------
requirements.txt | 6 ++-
src/core/_base_crawler.py | 2 +-
src/core/demo_crawler.py | 2 +-
src/core/pager.py | 2 +-
src/utils/log.py | 90 ---------------------------------------
8 files changed, 42 insertions(+), 162 deletions(-)
delete mode 100644 src/utils/log.py
diff --git a/README.md b/README.md
index 22f6269..927cae9 100644
--- a/README.md
+++ b/README.md
@@ -18,20 +18,3 @@
5. 修改 [`autorun.yml`](./.github/workflows/autorun.yml),可通过 Github Actions 自动运行
6. 开启 Github Pages,指定目录为 master/docs
-
-## 赞助途径
-
-| 支付宝 | 微信 |
-|:---:|:---:|
-| ![](docs/imgs/alipay.png) | ![](docs/imgs/wechat.png) |
-
-
-## 版权声明
-
- [![Copyright (C) EXP,2016](https://img.shields.io/badge/Copyright%20(C)-EXP%202016-blue.svg)](http://exp-blog.com) [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
-
-- Site: [http://exp-blog.com](http://exp-blog.com)
-- Mail: 289065406@qq.com
-
-
-------
diff --git a/gen_pdm.py b/gen_pdm.py
index 1767d33..afef8f0 100644
--- a/gen_pdm.py
+++ b/gen_pdm.py
@@ -9,7 +9,7 @@
from pypdm.dbc._sqlite import SqliteDBC
from pypdm.builder import build
from src import config
-from src.utils import log
+from color_log.clog import log
diff --git a/main.py b/main.py
index 956b05f..004e419 100644
--- a/main.py
+++ b/main.py
@@ -3,35 +3,48 @@
# @Author : EXP
# -----------------------------------------------
+import argparse
import sys
from pypdm.dbc._sqlite import SqliteDBC
from src.core.demo_crawler import DemoCrawler
from src import config
from src.core import pager
-from src.utils import log
-
-
-def help_info() :
- return '''
--h 查看帮助信息
--p 爬取页数,默认 10
--z 指定爬取地区
-'''
-
-
-def main(is_help, pages, zone) :
- if is_help :
- log.info(help_info())
- return
-
+from color_log.clog import log
+
+
+def args() :
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ prog='Python 爬虫开发模板',
+ description='使用此模板可以快速搭建一个爬虫框架',
+ epilog='\r\n'.join([
+ '示例: ',
+ ' 单机连续帧识别模式:python main.py',
+ ' 单机连续帧模式:python main.py -m alone -f',
+ ' 单机截屏识别模式:python main.py -m alone',
+ ' 联机模式:python main.py -m duplex -r ai',
+ ' 联机模式:python main.py -m duplex -r ctrl',
+ '',
+ '(单机模式只支持【无边框全屏】或【窗口】,联机模式只支持【无边框全屏】或【全屏】模式)'
+ ])
+ )
+ parser.add_argument('-p', '--pages', dest='pages', type=int, default=10, help='爬取页数')
+ parser.add_argument('-z', '--zone', dest='zone', type=str, default='china', help='爬取地区')
+ return parser.parse_args()
+
+
+
+def main(args) :
log.info('+++++++++++++++++++++++++++++++++++++++')
options = {
- 'pages': pages,
- 'zone': zone
+ # 爬虫参数,按需替换
+ # ... ...
+ 'pages': args.pages,
+ 'zone': args.zone
}
crawlers = [
DemoCrawler(options=options),
- # .... 其他爬虫的实现类
+ # ... ... 其他爬虫的实现类
]
all_cache_datas = []
@@ -47,7 +60,6 @@ def main(is_help, pages, zone) :
def init() :
- log.init()
sdbc = SqliteDBC(options=config.settings.database)
sdbc.conn()
sdbc.exec_script(config.settings.base['sqlpath'])
@@ -55,36 +67,9 @@ def init() :
-def sys_args(sys_args) :
- is_help = False
- pages = 10
- zone = 'CN'
-
- idx = 1
- size = len(sys_args)
- while idx < size :
- try :
- if sys_args[idx] == '-h' or sys_args[idx] == '--help' :
- is_help = True
- break
-
- elif sys_args[idx] == '-p' or sys_args[idx] == '--pages' :
- idx += 1
- pages = int(sys_args[idx])
-
- elif sys_args[idx] == '-z' or sys_args[idx] == '--zone' :
- idx += 1
- zone = sys_args[idx]
- except :
- pass
- idx += 1
- return [ is_help, pages, zone ]
-
-
-
if __name__ == "__main__" :
- init()
try :
- main(*sys_args(sys.argv))
+ init()
+ main(args())
except :
log.error('未知异常')
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index c8ef523..f865021 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
+argparse>=1.4.0
requests==2.22.0
bs4==0.0.1
-pypdm-db==1.1.2
-pyyaml-erb==1.0.4
+py-color-log>=1.0.4
+pypdm-db>=1.1.2
+pyyaml-erb>=1.0.5
diff --git a/src/core/_base_crawler.py b/src/core/_base_crawler.py
index dc3cfeb..a5f156e 100644
--- a/src/core/_base_crawler.py
+++ b/src/core/_base_crawler.py
@@ -7,7 +7,7 @@
from abc import ABCMeta, abstractmethod # python不存在抽象类的概念, 需要引入abc模块实现
from src import config
-from src.utils import log
+from color_log.clog import log
from pypdm.dbc._sqlite import SqliteDBC
from src.dao.t_crawler import TCrawlerDao
diff --git a/src/core/demo_crawler.py b/src/core/demo_crawler.py
index 25d9ae8..1fac520 100644
--- a/src/core/demo_crawler.py
+++ b/src/core/demo_crawler.py
@@ -10,7 +10,7 @@
from bs4 import BeautifulSoup
from src.core._base_crawler import BaseCrawler
from src.bean.cache_info import CacheInfo
-from src.utils import log
+from color_log.clog import log
class DemoCrawler(BaseCrawler):
diff --git a/src/core/pager.py b/src/core/pager.py
index 3bb63f4..56c2f1f 100644
--- a/src/core/pager.py
+++ b/src/core/pager.py
@@ -10,7 +10,7 @@
from src.bean.t_crawler import TCrawler
from src.dao.t_crawler import TCrawlerDao
from src import config
-from src.utils import log
+from color_log.clog import log
HTML_HOME_PATH = '%s/docs/home.html' % config.PRJ_DIR
TPL_HOME_PATH = '%s/tpl/home.tpl' % config.PRJ_DIR
diff --git a/src/utils/log.py b/src/utils/log.py
deleted file mode 100644
index bf3c6ef..0000000
--- a/src/utils/log.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Author : EXP
-# -----------------------------------------------
-
-import os
-import traceback
-import logging
-from logging.handlers import TimedRotatingFileHandler
-from src.config import PRJ_DIR
-
-LOG_DIR = '%s/log' % PRJ_DIR
-RUN_LOG = '%s/run.log' % LOG_DIR
-ERR_LOG = '%s/err.log' % LOG_DIR
-
-
-def init():
- """
- 初始化日志配置 (只需在程序入口调用一次)
- :return: None
- """
- if not os.path.exists(LOG_DIR) :
- os.makedirs(LOG_DIR)
-
- # 全局配置
- logger = logging.getLogger()
- logger.setLevel("DEBUG")
- BASIC_FORMAT = "%(asctime)s [%(levelname)s] : %(message)s"
- DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
- formatter = logging.Formatter(BASIC_FORMAT, DATE_FORMAT)
-
- # 输出到控制台的 handler
- ch = logging.StreamHandler()
- ch.setFormatter(formatter)
- ch.setLevel("DEBUG")
- logger.addHandler(ch)
-
- # 输出到运行日志文件的 handler
- fh = TimedRotatingFileHandler(filename=RUN_LOG, when="MIDNIGHT", interval=1, backupCount=7)
- fh.setFormatter(formatter)
- fh.setLevel("INFO")
- logger.addHandler(fh)
-
- # 输出到异常日志文件的 handler
- exfh = TimedRotatingFileHandler(filename=ERR_LOG, when="MIDNIGHT", interval=1, backupCount=7)
- exfh.setLevel("ERROR")
- exfh.setFormatter(formatter)
- logger.addHandler(exfh)
-
- # 禁用第三方日志
- logging.getLogger("requests").setLevel(logging.FATAL)
- logging.getLogger('chardet.charsetprober').setLevel(logging.FATAL)
-
-
-
-def debug(msg):
- """
- 打印调试信息
- :param msg: 日志信息
- :return: None
- """
- logging.debug(msg)
-
-
-def info(msg):
- """
- 打印正常信息
- :param msg: 日志信息
- :return: None
- """
- logging.info(msg)
-
-
-def warn(msg):
- """
- 打印警告信息
- :param msg: 日志信息
- :return: None
- """
- logging.warning(msg)
-
-
-def error(msg):
- """
- 打印异常信息和异常堆栈
- :param msg: 日志信息
- :return: None
- """
- logging.exception(msg)
- logging.exception(traceback.format_exc())