From 5e410152bae2cd4303cc75c85cdfa56103c1d9f8 Mon Sep 17 00:00:00 2001 From: EXP <289065406@qq.com> Date: Sat, 22 Oct 2022 13:34:02 +0800 Subject: [PATCH] Release: 2.0.0 --- README.md | 17 -------- gen_pdm.py | 2 +- main.py | 83 +++++++++++++++--------------------- requirements.txt | 6 ++- src/core/_base_crawler.py | 2 +- src/core/demo_crawler.py | 2 +- src/core/pager.py | 2 +- src/utils/log.py | 90 --------------------------------------- 8 files changed, 42 insertions(+), 162 deletions(-) delete mode 100644 src/utils/log.py diff --git a/README.md b/README.md index 22f6269..927cae9 100644 --- a/README.md +++ b/README.md @@ -18,20 +18,3 @@ 5. 修改 [`autorun.yml`](./.github/workflows/autorun.yml),可通过 Github Actions 自动运行 6. 开启 Github Pages,指定目录为 master/docs - -## 赞助途径 - -| 支付宝 | 微信 | -|:---:|:---:| -| ![](docs/imgs/alipay.png) | ![](docs/imgs/wechat.png) | - - -## 版权声明 - - [![Copyright (C) EXP,2016](https://img.shields.io/badge/Copyright%20(C)-EXP%202016-blue.svg)](http://exp-blog.com) [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) - -- Site: [http://exp-blog.com](http://exp-blog.com) -- Mail: 289065406@qq.com - - ------- diff --git a/gen_pdm.py b/gen_pdm.py index 1767d33..afef8f0 100644 --- a/gen_pdm.py +++ b/gen_pdm.py @@ -9,7 +9,7 @@ from pypdm.dbc._sqlite import SqliteDBC from pypdm.builder import build from src import config -from src.utils import log +from color_log.clog import log diff --git a/main.py b/main.py index 956b05f..004e419 100644 --- a/main.py +++ b/main.py @@ -3,35 +3,48 @@ # @Author : EXP # ----------------------------------------------- +import argparse import sys from pypdm.dbc._sqlite import SqliteDBC from src.core.demo_crawler import DemoCrawler from src import config from src.core import pager -from src.utils import log - - -def help_info() : - return ''' --h 查看帮助信息 --p 爬取页数,默认 10 --z 指定爬取地区 -''' - - -def main(is_help, pages, zone) : - if is_help : - log.info(help_info()) - return - +from color_log.clog import log + + +def args() : + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + prog='Python 爬虫开发模板', + description='使用此模板可以快速搭建一个爬虫框架', + epilog='\r\n'.join([ + '示例: ', + ' 单机连续帧识别模式:python main.py', + ' 单机连续帧模式:python main.py -m alone -f', + ' 单机截屏识别模式:python main.py -m alone', + ' 联机模式:python main.py -m duplex -r ai', + ' 联机模式:python main.py -m duplex -r ctrl', + '', + '(单机模式只支持【无边框全屏】或【窗口】,联机模式只支持【无边框全屏】或【全屏】模式)' + ]) + ) + parser.add_argument('-p', '--pages', dest='pages', type=int, default=10, help='爬取页数') + parser.add_argument('-z', '--zone', dest='zone', type=str, default='china', help='爬取地区') + return parser.parse_args() + + + +def main(args) : log.info('+++++++++++++++++++++++++++++++++++++++') options = { - 'pages': pages, - 'zone': zone + # 爬虫参数,按需替换 + # ... ... + 'pages': args.pages, + 'zone': args.zone } crawlers = [ DemoCrawler(options=options), - # .... 其他爬虫的实现类 + # ... ... 其他爬虫的实现类 ] all_cache_datas = [] @@ -47,7 +60,6 @@ def main(is_help, pages, zone) : def init() : - log.init() sdbc = SqliteDBC(options=config.settings.database) sdbc.conn() sdbc.exec_script(config.settings.base['sqlpath']) @@ -55,36 +67,9 @@ def init() : -def sys_args(sys_args) : - is_help = False - pages = 10 - zone = 'CN' - - idx = 1 - size = len(sys_args) - while idx < size : - try : - if sys_args[idx] == '-h' or sys_args[idx] == '--help' : - is_help = True - break - - elif sys_args[idx] == '-p' or sys_args[idx] == '--pages' : - idx += 1 - pages = int(sys_args[idx]) - - elif sys_args[idx] == '-z' or sys_args[idx] == '--zone' : - idx += 1 - zone = sys_args[idx] - except : - pass - idx += 1 - return [ is_help, pages, zone ] - - - if __name__ == "__main__" : - init() try : - main(*sys_args(sys.argv)) + init() + main(args()) except : log.error('未知异常') \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c8ef523..f865021 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ +argparse>=1.4.0 requests==2.22.0 bs4==0.0.1 -pypdm-db==1.1.2 -pyyaml-erb==1.0.4 +py-color-log>=1.0.4 +pypdm-db>=1.1.2 +pyyaml-erb>=1.0.5 diff --git a/src/core/_base_crawler.py b/src/core/_base_crawler.py index dc3cfeb..a5f156e 100644 --- a/src/core/_base_crawler.py +++ b/src/core/_base_crawler.py @@ -7,7 +7,7 @@ from abc import ABCMeta, abstractmethod # python不存在抽象类的概念, 需要引入abc模块实现 from src import config -from src.utils import log +from color_log.clog import log from pypdm.dbc._sqlite import SqliteDBC from src.dao.t_crawler import TCrawlerDao diff --git a/src/core/demo_crawler.py b/src/core/demo_crawler.py index 25d9ae8..1fac520 100644 --- a/src/core/demo_crawler.py +++ b/src/core/demo_crawler.py @@ -10,7 +10,7 @@ from bs4 import BeautifulSoup from src.core._base_crawler import BaseCrawler from src.bean.cache_info import CacheInfo -from src.utils import log +from color_log.clog import log class DemoCrawler(BaseCrawler): diff --git a/src/core/pager.py b/src/core/pager.py index 3bb63f4..56c2f1f 100644 --- a/src/core/pager.py +++ b/src/core/pager.py @@ -10,7 +10,7 @@ from src.bean.t_crawler import TCrawler from src.dao.t_crawler import TCrawlerDao from src import config -from src.utils import log +from color_log.clog import log HTML_HOME_PATH = '%s/docs/home.html' % config.PRJ_DIR TPL_HOME_PATH = '%s/tpl/home.tpl' % config.PRJ_DIR diff --git a/src/utils/log.py b/src/utils/log.py deleted file mode 100644 index bf3c6ef..0000000 --- a/src/utils/log.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Author : EXP -# ----------------------------------------------- - -import os -import traceback -import logging -from logging.handlers import TimedRotatingFileHandler -from src.config import PRJ_DIR - -LOG_DIR = '%s/log' % PRJ_DIR -RUN_LOG = '%s/run.log' % LOG_DIR -ERR_LOG = '%s/err.log' % LOG_DIR - - -def init(): - """ - 初始化日志配置 (只需在程序入口调用一次) - :return: None - """ - if not os.path.exists(LOG_DIR) : - os.makedirs(LOG_DIR) - - # 全局配置 - logger = logging.getLogger() - logger.setLevel("DEBUG") - BASIC_FORMAT = "%(asctime)s [%(levelname)s] : %(message)s" - DATE_FORMAT = "%Y-%m-%d %H:%M:%S" - formatter = logging.Formatter(BASIC_FORMAT, DATE_FORMAT) - - # 输出到控制台的 handler - ch = logging.StreamHandler() - ch.setFormatter(formatter) - ch.setLevel("DEBUG") - logger.addHandler(ch) - - # 输出到运行日志文件的 handler - fh = TimedRotatingFileHandler(filename=RUN_LOG, when="MIDNIGHT", interval=1, backupCount=7) - fh.setFormatter(formatter) - fh.setLevel("INFO") - logger.addHandler(fh) - - # 输出到异常日志文件的 handler - exfh = TimedRotatingFileHandler(filename=ERR_LOG, when="MIDNIGHT", interval=1, backupCount=7) - exfh.setLevel("ERROR") - exfh.setFormatter(formatter) - logger.addHandler(exfh) - - # 禁用第三方日志 - logging.getLogger("requests").setLevel(logging.FATAL) - logging.getLogger('chardet.charsetprober').setLevel(logging.FATAL) - - - -def debug(msg): - """ - 打印调试信息 - :param msg: 日志信息 - :return: None - """ - logging.debug(msg) - - -def info(msg): - """ - 打印正常信息 - :param msg: 日志信息 - :return: None - """ - logging.info(msg) - - -def warn(msg): - """ - 打印警告信息 - :param msg: 日志信息 - :return: None - """ - logging.warning(msg) - - -def error(msg): - """ - 打印异常信息和异常堆栈 - :param msg: 日志信息 - :return: None - """ - logging.exception(msg) - logging.exception(traceback.format_exc())