From 4890e30f0703f60217646e45555d163b4298e8a4 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sat, 22 Jun 2019 11:43:11 +0800
Subject: [PATCH 01/12] updated requirements.txt

---
 crawlab/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crawlab/requirements.txt b/crawlab/requirements.txt
index fed8fb801..67c8d0db6 100644
--- a/crawlab/requirements.txt
+++ b/crawlab/requirements.txt
@@ -13,3 +13,4 @@ Werkzeug==0.15.2
 eventlet
 Celery
 Flower
+redis

From 91b5614a4bd728614a709f7b512f15c67a4505a6 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sat, 22 Jun 2019 11:44:22 +0800
Subject: [PATCH 02/12] updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 182c419c6..29cabbf9f 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -38,7 +38,7 @@ pipeline {
                 echo 'Deploying....'
                 sh """
                 docker stop crawlab | true
-                docker run -d --rm --restart always --name crawlab \
+                docker run -d --rm --name crawlab \
                     -p 8080:8080 \
                     -p 8000:8000 \
                     -v /home/yeqing/.env.production:/opt/crawlab/frontend/.env.production \

From 1b2b7a486bee3374b2424f4e180557ef9941fa70 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sat, 22 Jun 2019 11:45:20 +0800
Subject: [PATCH 03/12] updated Dockerfile

---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 5622fb0dd..3b07591da 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -35,8 +35,8 @@ RUN npm install -g yarn \
 	&& yarn install
 
 # install backend
-RUN pip install -U setuptools \
-	&& pip install -r /opt/crawlab/crawlab/requirements.txt
+RUN pip install -U setuptools -i https://pypi.tuna.tsinghua.edu.cn/simple \
+	&& pip install -r /opt/crawlab/crawlab/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 
 # start backend
 EXPOSE 8080

From a37860e7707c577fc05feb21e72d81e9bdbec7dc Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sat, 22 Jun 2019 11:57:06 +0800
Subject: [PATCH 04/12] updated Jenkinsfile

---
 Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 29cabbf9f..3ab1b5271 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -42,7 +42,8 @@ pipeline {
                     -p 8080:8080 \
                     -p 8000:8000 \
                     -v /home/yeqing/.env.production:/opt/crawlab/frontend/.env.production \
-                    -v /home/yeqing/config.py:/opt/crawlab/crawlab/config/config.py
+                    -v /home/yeqing/config.py:/opt/crawlab/crawlab/config/config.py \
+                    tikazyq/crawlab master
                 """
             }
         }

From 5f508b46f8a09ec67df2ca480cf90a3039f23012 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sat, 22 Jun 2019 15:03:31 +0800
Subject: [PATCH 05/12] updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 3ab1b5271..9c893e51b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -24,7 +24,7 @@ pipeline {
             steps {
                 echo "Building..."
                 sh """
-                docker build -t crawlab:latest .
+                docker build -t tikazyq/crawlab:latest .
                 """
             }
         }

From 016363c340910d444d83640304c843e5d1d4b578 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sat, 22 Jun 2019 15:19:40 +0800
Subject: [PATCH 06/12] updated Jenkinsfile

---
 CHANGELOG.md                                  |  1 -
 Jenkinsfile                                   |  2 +-
 spiders/example_juejin/juejin/__init__.py     |  0
 spiders/example_juejin/juejin/items.py        | 17 ----
 spiders/example_juejin/juejin/middlewares.py  | 56 ------------
 spiders/example_juejin/juejin/pipelines.py    | 25 ------
 spiders/example_juejin/juejin/settings.py     | 89 -------------------
 .../example_juejin/juejin/spiders/__init__.py |  4 -
 .../juejin/spiders/juejin_spider.py           | 17 ----
 spiders/example_juejin/scrapy.cfg             | 11 ---
 spiders/example_juejin/start.py               |  2 -
 11 files changed, 1 insertion(+), 223 deletions(-)
 delete mode 100644 spiders/example_juejin/juejin/__init__.py
 delete mode 100644 spiders/example_juejin/juejin/items.py
 delete mode 100644 spiders/example_juejin/juejin/middlewares.py
 delete mode 100644 spiders/example_juejin/juejin/pipelines.py
 delete mode 100644 spiders/example_juejin/juejin/settings.py
 delete mode 100644 spiders/example_juejin/juejin/spiders/__init__.py
 delete mode 100644 spiders/example_juejin/juejin/spiders/juejin_spider.py
 delete mode 100644 spiders/example_juejin/scrapy.cfg
 delete mode 100644 spiders/example_juejin/start.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 70cd6e83d..7c81377a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,6 @@
 ### Features / Enhancement
 - **Documentation**: Better and much more detailed documentation.
 - **Better Crontab**: Make crontab expression through crontab UI.
-- **High Concurrency**: `gevent` + `flask` to support high concurrency. [#70](https://github.com/tikazyq/crawlab/issues/70)
 
 ### Bugs Fixes
 - **Deleting Spider**. Deleting a spider does not only remove record in db but also removing related folder, tasks and schedules. [#69](https://github.com/tikazyq/crawlab/issues/69)
diff --git a/Jenkinsfile b/Jenkinsfile
index 9c893e51b..b22488cc1 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -37,7 +37,7 @@ pipeline {
             steps {
                 echo 'Deploying....'
                 sh """
-                docker stop crawlab | true
+                docker rm -f crawlab | true
                 docker run -d --rm --name crawlab \
                     -p 8080:8080 \
                     -p 8000:8000 \
diff --git a/spiders/example_juejin/juejin/__init__.py b/spiders/example_juejin/juejin/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/spiders/example_juejin/juejin/items.py b/spiders/example_juejin/juejin/items.py
deleted file mode 100644
index 2c4717dd9..000000000
--- a/spiders/example_juejin/juejin/items.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Define here the models for your scraped items
-#
-# See documentation in:
-# http://doc.scrapy.org/en/latest/topics/items.html
-
-import scrapy
-
-
-class JuejinItem(scrapy.Item):
-    # define the fields for your item here like:
-    _id = scrapy.Field()
-    title = scrapy.Field()
-    link = scrapy.Field()
-    like = scrapy.Field()
-    task_id = scrapy.Field()
diff --git a/spiders/example_juejin/juejin/middlewares.py b/spiders/example_juejin/juejin/middlewares.py
deleted file mode 100644
index 9d5225a20..000000000
--- a/spiders/example_juejin/juejin/middlewares.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Define here the models for your spider middleware
-#
-# See documentation in:
-# http://doc.scrapy.org/en/latest/topics/spider-middleware.html
-
-from scrapy import signals
-
-
-class JuejinSpiderMiddleware(object):
-    # Not all methods need to be defined. If a method is not defined,
-    # scrapy acts as if the spider middleware does not modify the
-    # passed objects.
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        # This method is used by Scrapy to create your spiders.
-        s = cls()
-        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
-        return s
-
-    def process_spider_input(self, response, spider):
-        # Called for each response that goes through the spider
-        # middleware and into the spider.
-
-        # Should return None or raise an exception.
-        return None
-
-    def process_spider_output(self, response, result, spider):
-        # Called with the results returned from the Spider, after
-        # it has processed the response.
-
-        # Must return an iterable of Request, dict or Item objects.
-        for i in result:
-            yield i
-
-    def process_spider_exception(self, response, exception, spider):
-        # Called when a spider or process_spider_input() method
-        # (from other spider middleware) raises an exception.
-
-        # Should return either None or an iterable of Response, dict
-        # or Item objects.
-        pass
-
-    def process_start_requests(self, start_requests, spider):
-        # Called with the start requests of the spider, and works
-        # similarly to the process_spider_output() method, except
-        # that it doesn’t have a response associated.
-
-        # Must return only requests (not items).
-        for r in start_requests:
-            yield r
-
-    def spider_opened(self, spider):
-        spider.logger.info('Spider opened: %s' % spider.name)
diff --git a/spiders/example_juejin/juejin/pipelines.py b/spiders/example_juejin/juejin/pipelines.py
deleted file mode 100644
index 1c4ffdc17..000000000
--- a/spiders/example_juejin/juejin/pipelines.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Define your item pipelines here
-#
-# Don't forget to add your pipeline to the ITEM_PIPELINES setting
-# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
-import os
-
-from pymongo import MongoClient
-
-MONGO_HOST = os.environ['MONGO_HOST']
-MONGO_PORT = int(os.environ['MONGO_PORT'])
-MONGO_DB = os.environ['MONGO_DB']
-
-
-class JuejinPipeline(object):
-    mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
-    db = mongo[MONGO_DB]
-    col_name = os.environ.get('CRAWLAB_COLLECTION','test')
-    col = db[col_name]
-
-    def process_item(self, item, spider):
-        item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
-        self.col.save(item)
-        return item
diff --git a/spiders/example_juejin/juejin/settings.py b/spiders/example_juejin/juejin/settings.py
deleted file mode 100644
index 44f8866cf..000000000
--- a/spiders/example_juejin/juejin/settings.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Scrapy settings for juejin project
-#
-# For simplicity, this file contains only settings considered important or
-# commonly used. You can find more settings consulting the documentation:
-#
-#     http://doc.scrapy.org/en/latest/topics/settings.html
-#     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
-#     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
-
-BOT_NAME = 'juejin'
-
-SPIDER_MODULES = ['juejin.spiders']
-NEWSPIDER_MODULE = 'juejin.spiders'
-
-# Crawl responsibly by identifying yourself (and your website) on the user-agent
-USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
-
-# Obey robots.txt rules
-ROBOTSTXT_OBEY = True
-
-# Configure maximum concurrent requests performed by Scrapy (default: 16)
-# CONCURRENT_REQUESTS = 32
-
-# Configure a delay for requests for the same website (default: 0)
-# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
-# See also autothrottle settings and docs
-# DOWNLOAD_DELAY = 3
-# The download delay setting will honor only one of:
-# CONCURRENT_REQUESTS_PER_DOMAIN = 16
-# CONCURRENT_REQUESTS_PER_IP = 16
-
-# Disable cookies (enabled by default)
-# COOKIES_ENABLED = False
-
-# Disable Telnet Console (enabled by default)
-# TELNETCONSOLE_ENABLED = False
-
-# Override the default request headers:
-# DEFAULT_REQUEST_HEADERS = {
-#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-#   'Accept-Language': 'en',
-# }
-
-# Enable or disable spider middlewares
-# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
-# SPIDER_MIDDLEWARES = {
-#    'juejin.middlewares.JuejinSpiderMiddleware': 543,
-# }
-
-# Enable or disable downloader middlewares
-# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
-# DOWNLOADER_MIDDLEWARES = {
-#    'juejin.middlewares.MyCustomDownloaderMiddleware': 543,
-# }
-
-# Enable or disable extensions
-# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
-# EXTENSIONS = {
-#    'scrapy.extensions.telnet.TelnetConsole': None,
-# }
-
-# Configure item pipelines
-# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
-ITEM_PIPELINES = {
-    'juejin.pipelines.JuejinPipeline': 300,
-}
-
-# Enable and configure the AutoThrottle extension (disabled by default)
-# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
-# AUTOTHROTTLE_ENABLED = True
-# The initial download delay
-# AUTOTHROTTLE_START_DELAY = 5
-# The maximum download delay to be set in case of high latencies
-# AUTOTHROTTLE_MAX_DELAY = 60
-# The average number of requests Scrapy should be sending in parallel to
-# each remote server
-# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
-# Enable showing throttling stats for every response received:
-# AUTOTHROTTLE_DEBUG = False
-
-# Enable and configure HTTP caching (disabled by default)
-# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
-# HTTPCACHE_ENABLED = True
-# HTTPCACHE_EXPIRATION_SECS = 0
-# HTTPCACHE_DIR = 'httpcache'
-# HTTPCACHE_IGNORE_HTTP_CODES = []
-# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
diff --git a/spiders/example_juejin/juejin/spiders/__init__.py b/spiders/example_juejin/juejin/spiders/__init__.py
deleted file mode 100644
index ebd689ac5..000000000
--- a/spiders/example_juejin/juejin/spiders/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# This package will contain the spiders of your Scrapy project
-#
-# Please refer to the documentation for information on how to create and manage
-# your spiders.
diff --git a/spiders/example_juejin/juejin/spiders/juejin_spider.py b/spiders/example_juejin/juejin/spiders/juejin_spider.py
deleted file mode 100644
index 28df5be71..000000000
--- a/spiders/example_juejin/juejin/spiders/juejin_spider.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# -*- coding: utf-8 -*-
-import scrapy
-from juejin.items import JuejinItem
-
-
-class JuejinSpiderSpider(scrapy.Spider):
-    name = 'juejin_spider'
-    allowed_domains = ['juejin.com']
-    start_urls = ['https://juejin.im/search?query=celery']
-
-    def parse(self, response):
-        for item in response.css('ul.main-list > li.item'):
-            yield JuejinItem(
-                title=item.css('.title span').extract_first(),
-                link=item.css('a::attr("href")').extract_first(),
-                like=item.css('.like .count::text').extract_first(),
-            )
diff --git a/spiders/example_juejin/scrapy.cfg b/spiders/example_juejin/scrapy.cfg
deleted file mode 100644
index 38ba44f1e..000000000
--- a/spiders/example_juejin/scrapy.cfg
+++ /dev/null
@@ -1,11 +0,0 @@
-# Automatically created by: scrapy startproject
-#
-# For more information about the [deploy] section see:
-# https://scrapyd.readthedocs.org/en/latest/deploy.html
-
-[settings]
-default = juejin.settings
-
-[deploy]
-#url = http://localhost:6800/
-project = juejin
diff --git a/spiders/example_juejin/start.py b/spiders/example_juejin/start.py
deleted file mode 100644
index ec2f47dd5..000000000
--- a/spiders/example_juejin/start.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from scrapy import cmdline
-cmdline.execute(["scrapy","crawl","juejin_spider"])
\ No newline at end of file

From fc15fa42ac3f8fff56bd95ece6a1a462f0ee7612 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sun, 23 Jun 2019 11:42:33 +0800
Subject: [PATCH 07/12] added xueqiu spider

---
 spiders/xueqiu/scrapy.cfg                     |  11 ++
 spiders/xueqiu/xueqiu/__init__.py             |   0
 spiders/xueqiu/xueqiu/items.py                |  20 ++++
 spiders/xueqiu/xueqiu/middlewares.py          | 103 ++++++++++++++++++
 spiders/xueqiu/xueqiu/pipelines.py            |  27 +++++
 spiders/xueqiu/xueqiu/settings.py             |  89 +++++++++++++++
 spiders/xueqiu/xueqiu/spiders/__init__.py     |   4 +
 .../xueqiu/xueqiu/spiders/xueqiu_spider.py    |  43 ++++++++
 8 files changed, 297 insertions(+)
 create mode 100644 spiders/xueqiu/scrapy.cfg
 create mode 100644 spiders/xueqiu/xueqiu/__init__.py
 create mode 100644 spiders/xueqiu/xueqiu/items.py
 create mode 100644 spiders/xueqiu/xueqiu/middlewares.py
 create mode 100644 spiders/xueqiu/xueqiu/pipelines.py
 create mode 100644 spiders/xueqiu/xueqiu/settings.py
 create mode 100644 spiders/xueqiu/xueqiu/spiders/__init__.py
 create mode 100644 spiders/xueqiu/xueqiu/spiders/xueqiu_spider.py

diff --git a/spiders/xueqiu/scrapy.cfg b/spiders/xueqiu/scrapy.cfg
new file mode 100644
index 000000000..2c5ce3b3f
--- /dev/null
+++ b/spiders/xueqiu/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = xueqiu.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = xueqiu
diff --git a/spiders/xueqiu/xueqiu/__init__.py b/spiders/xueqiu/xueqiu/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/spiders/xueqiu/xueqiu/items.py b/spiders/xueqiu/xueqiu/items.py
new file mode 100644
index 000000000..e50e4823b
--- /dev/null
+++ b/spiders/xueqiu/xueqiu/items.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class XueqiuItem(scrapy.Item):
+    # define the fields for your item here like:
+    _id = scrapy.Field()
+    task_id = scrapy.Field()
+    id = scrapy.Field()
+    text = scrapy.Field()
+    target = scrapy.Field()
+    view_count = scrapy.Field()
+    mark = scrapy.Field()
+    created_at = scrapy.Field()
diff --git a/spiders/xueqiu/xueqiu/middlewares.py b/spiders/xueqiu/xueqiu/middlewares.py
new file mode 100644
index 000000000..f60102ce4
--- /dev/null
+++ b/spiders/xueqiu/xueqiu/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class XueqiuSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Response, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class XueqiuDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
diff --git a/spiders/xueqiu/xueqiu/pipelines.py b/spiders/xueqiu/xueqiu/pipelines.py
new file mode 100644
index 000000000..5fc397f29
--- /dev/null
+++ b/spiders/xueqiu/xueqiu/pipelines.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
+import os
+
+from pymongo import MongoClient
+
+
+
+
+class XueqiuPipeline(object):
+    mongo = MongoClient(
+        host='localhost',
+        port=27017
+    )
+    db = mongo['crawlab_test']
+    col = db.get_collection(os.environ.get('CRAWLAB_COLLECTION') or 'results_xueqiu')
+
+    def process_item(self, item, spider):
+        item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
+        item['_id'] = item['id']
+        if self.col.find_one({'_id': item['_id']}) is None:
+            self.col.save(item)
+        return item
diff --git a/spiders/xueqiu/xueqiu/settings.py b/spiders/xueqiu/xueqiu/settings.py
new file mode 100644
index 000000000..b44a74e19
--- /dev/null
+++ b/spiders/xueqiu/xueqiu/settings.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+# Scrapy settings for xueqiu project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://doc.scrapy.org/en/latest/topics/settings.html
+#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'xueqiu'
+
+SPIDER_MODULES = ['xueqiu.spiders']
+NEWSPIDER_MODULE = 'xueqiu.spiders'
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+# CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+# DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+# CONCURRENT_REQUESTS_PER_DOMAIN = 16
+# CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+# COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+# TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+# DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+# }
+
+# Enable or disable spider middlewares
+# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+# SPIDER_MIDDLEWARES = {
+#    'xueqiu.middlewares.XueqiuSpiderMiddleware': 543,
+# }
+
+# Enable or disable downloader middlewares
+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+# DOWNLOADER_MIDDLEWARES = {
+#    'xueqiu.middlewares.XueqiuDownloaderMiddleware': 543,
+# }
+
+# Enable or disable extensions
+# See https://doc.scrapy.org/en/latest/topics/extensions.html
+# EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+# }
+
+# Configure item pipelines
+# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+    'xueqiu.pipelines.XueqiuPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
+# AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+# AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+# AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+# AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+# HTTPCACHE_ENABLED = True
+# HTTPCACHE_EXPIRATION_SECS = 0
+# HTTPCACHE_DIR = 'httpcache'
+# HTTPCACHE_IGNORE_HTTP_CODES = []
+# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
diff --git a/spiders/xueqiu/xueqiu/spiders/__init__.py b/spiders/xueqiu/xueqiu/spiders/__init__.py
new file mode 100644
index 000000000..ebd689ac5
--- /dev/null
+++ b/spiders/xueqiu/xueqiu/spiders/__init__.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git a/spiders/xueqiu/xueqiu/spiders/xueqiu_spider.py b/spiders/xueqiu/xueqiu/spiders/xueqiu_spider.py
new file mode 100644
index 000000000..6ccb13c09
--- /dev/null
+++ b/spiders/xueqiu/xueqiu/spiders/xueqiu_spider.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+import json
+from time import sleep
+
+import scrapy
+
+from xueqiu.items import XueqiuItem
+
+
+class XueqiuSpiderSpider(scrapy.Spider):
+    name = 'xueqiu_spider'
+    allowed_domains = ['xueqiu.com']
+
+    def start_requests(self):
+        return [scrapy.Request(
+            url='https://xueqiu.com',
+            callback=self.parse_home
+        )]
+
+    def parse_home(self, response):
+        yield scrapy.Request(
+            url='https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=20&category=6'
+        )
+
+    def parse(self, response):
+        data = json.loads(response.body)
+        next_max_id = data.get('next_max_id')
+        sleep(1)
+        for row in data.get('list'):
+            d = json.loads(row.get('data'))
+            item = XueqiuItem(
+                id=d['id'],
+                text=d['text'],
+                mark=d['mark'],
+                target=d['target'],
+                created_at=d['created_at'],
+                view_count=d['view_count'],
+            )
+            yield item
+
+        yield scrapy.Request(
+            url=f'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={next_max_id}&count=20&category=6'
+        )

From 29cea71ab8f2624e84309bb57d0a278ddc78dfbc Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sun, 23 Jun 2019 11:42:43 +0800
Subject: [PATCH 08/12] added xueqiu spider

---
 spiders/xueqiu/xueqiu/pipelines.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/spiders/xueqiu/xueqiu/pipelines.py b/spiders/xueqiu/xueqiu/pipelines.py
index 5fc397f29..6785ace14 100644
--- a/spiders/xueqiu/xueqiu/pipelines.py
+++ b/spiders/xueqiu/xueqiu/pipelines.py
@@ -9,8 +9,6 @@
 from pymongo import MongoClient
 
 
-
-
 class XueqiuPipeline(object):
     mongo = MongoClient(
         host='localhost',

From 3a86b1b2f7ebe3eb3de02b9b642c92dbd1652d87 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sun, 23 Jun 2019 13:58:04 +0800
Subject: [PATCH 09/12] updated xueqiu spider

---
 spiders/xueqiu/xueqiu/pipelines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spiders/xueqiu/xueqiu/pipelines.py b/spiders/xueqiu/xueqiu/pipelines.py
index 6785ace14..16753bb6c 100644
--- a/spiders/xueqiu/xueqiu/pipelines.py
+++ b/spiders/xueqiu/xueqiu/pipelines.py
@@ -22,4 +22,4 @@ def process_item(self, item, spider):
         item['_id'] = item['id']
         if self.col.find_one({'_id': item['_id']}) is None:
             self.col.save(item)
-        return item
+            return item

From d92abb50d218af88481ca952a17c6c6be6f8e1c1 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sun, 23 Jun 2019 20:57:22 +0800
Subject: [PATCH 10/12] updated xueqiu spider

---
 spiders/xueqiu/xueqiu/pipelines.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spiders/xueqiu/xueqiu/pipelines.py b/spiders/xueqiu/xueqiu/pipelines.py
index 16753bb6c..a5f9c02bf 100644
--- a/spiders/xueqiu/xueqiu/pipelines.py
+++ b/spiders/xueqiu/xueqiu/pipelines.py
@@ -11,10 +11,10 @@
 
 class XueqiuPipeline(object):
     mongo = MongoClient(
-        host='localhost',
-        port=27017
+        host=os.environ.get('MONGO_HOST') or 'localhost',
+        port=int(os.environ.get('MONGO_DB')) or 27017
     )
-    db = mongo['crawlab_test']
+    db = mongo[os.environ.get('MONGO_DB') or 'crawlab_test']
     col = db.get_collection(os.environ.get('CRAWLAB_COLLECTION') or 'results_xueqiu')
 
     def process_item(self, item, spider):

From 9f333f025c91be476e3ad0675ed0fc61372ac057 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sun, 23 Jun 2019 21:08:47 +0800
Subject: [PATCH 11/12] updated xueqiu spider

---
 spiders/xueqiu/xueqiu/pipelines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spiders/xueqiu/xueqiu/pipelines.py b/spiders/xueqiu/xueqiu/pipelines.py
index a5f9c02bf..90a86da20 100644
--- a/spiders/xueqiu/xueqiu/pipelines.py
+++ b/spiders/xueqiu/xueqiu/pipelines.py
@@ -12,7 +12,7 @@
 class XueqiuPipeline(object):
     mongo = MongoClient(
         host=os.environ.get('MONGO_HOST') or 'localhost',
-        port=int(os.environ.get('MONGO_DB')) or 27017
+        port=int(os.environ.get('MONGO_PORT')) or 27017
     )
     db = mongo[os.environ.get('MONGO_DB') or 'crawlab_test']
     col = db.get_collection(os.environ.get('CRAWLAB_COLLECTION') or 'results_xueqiu')

From d66f4d4b398d249a3279f8fb09ed1aaad58cae23 Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Sat, 29 Jun 2019 11:52:41 +0800
Subject: [PATCH 12/12] fixed https://github.com/tikazyq/crawlab/issues/74

---
 crawlab/tasks/spider.py            | 8 ++++++--
 spiders/xueqiu/xueqiu/pipelines.py | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/crawlab/tasks/spider.py b/crawlab/tasks/spider.py
index e03bab66e..3bda236c5 100644
--- a/crawlab/tasks/spider.py
+++ b/crawlab/tasks/spider.py
@@ -2,6 +2,7 @@
 import sys
 from datetime import datetime
 from time import sleep
+import traceback
 
 from bson import ObjectId
 from pymongo import ASCENDING, DESCENDING
@@ -213,8 +214,10 @@ def execute_config_spider(self, id: str, params: str = None):
     env['MONGO_HOST'] = MONGO_HOST
     env['MONGO_PORT'] = str(MONGO_PORT)
     env['MONGO_DB'] = MONGO_DB
-    env['MONGO_USERNAME'] = MONGO_USERNAME
-    env['MONGO_PASSWORD'] = MONGO_PASSWORD
+    if MONGO_USERNAME is not None:
+        env['MONGO_USERNAME'] = MONGO_USERNAME
+    if MONGO_PASSWORD:
+        env['MONGO_PASSWORD'] = MONGO_PASSWORD
 
     cmd_arr = [
         sys.executable,
@@ -246,6 +249,7 @@ def execute_config_spider(self, id: str, params: str = None):
         else:
             status = TaskStatus.FAILURE
     except Exception as err:
+        traceback.print_exc()
         logger.error(err)
         stderr.write(str(err))
         status = TaskStatus.FAILURE
diff --git a/spiders/xueqiu/xueqiu/pipelines.py b/spiders/xueqiu/xueqiu/pipelines.py
index 90a86da20..671737725 100644
--- a/spiders/xueqiu/xueqiu/pipelines.py
+++ b/spiders/xueqiu/xueqiu/pipelines.py
@@ -12,7 +12,7 @@
 class XueqiuPipeline(object):
     mongo = MongoClient(
         host=os.environ.get('MONGO_HOST') or 'localhost',
-        port=int(os.environ.get('MONGO_PORT')) or 27017
+        port=int(os.environ.get('MONGO_PORT') or 27017)
     )
     db = mongo[os.environ.get('MONGO_DB') or 'crawlab_test']
     col = db.get_collection(os.environ.get('CRAWLAB_COLLECTION') or 'results_xueqiu')