-
Notifications
You must be signed in to change notification settings - Fork 2
/
start-crawl
executable file
·47 lines (36 loc) · 1.47 KB
/
start-crawl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env bash
# Try to get the API from the spider's arguments
read_config(){
CONFIG_NAME=$1
if [[ -z "${!CONFIG_NAME}" ]]; then
VALUE=`echo $SHUB_JOB_DATA | jq ".spider_args.$CONFIG_NAME" | grep -v null | tr -d \"`
fi
# Try to get the API Key the custom job settings
if [[ -z "${!CONFIG_NAME}" ]]; then
VALUE=`echo $SHUB_SETTINGS | jq ".job_settings.$CONFIG_NAME" | grep -v null | tr -d \"`
fi
# Try to get the API Key the global spider settings
if [[ -z "${!CONFIG_NAME}" ]]; then
VALUE=`echo $SHUB_SETTINGS | jq ".spider_settings.$CONFIG_NAME" | grep -v null | tr -d \"`
fi
# Try to get the API Key the global project settings
if [[ -z "${!CONFIG_NAME}" ]]; then
VALUE=`echo $SHUB_SETTINGS | jq ".project_settings.$CONFIG_NAME" | grep -v null | tr -d \"`
fi
eval $CONFIG_NAME="'$VALUE'";
}
CRAWLERA_HEADLESS_PROXY_OPTIONS=""
read_config 'CRAWLERA_APIKEY'
read_config 'CRAWLERA_HEADLESS_PROXY_OPTIONS'
#Check if we do have an API Key to work with
if [[ -z "$CRAWLERA_APIKEY" ]]; then
echo "FATAL ERROR: CRAWLERA_APIKEY setting not defined. Aborting";
exit 1;
fi
crawlera-headless-proxy -a $CRAWLERA_APIKEY $CRAWLERA_HEADLESS_PROXY_OPTIONS &
echo "Started Crawlera headless proxy using \"$CRAWLERA_HEADLESS_PROXY_OPTIONS\" options, and API Key `echo $CRAWLERA_APIKEY | cut -c 1-5`..."
# Run regular start-crawl
/usr/bin/env python -m sh_scrapy.crawl
# Kill crawlera-headless-proxy so the job can finish.
pkill -f crawlera-headless-proxy;
exit 0;