+ {% for harvester in h.harvesters_info() %}
+ {% set checked = False %}
+@@ -46,7 +46,11 @@
+ {{ form.select('frequency', id='field-frequency', label=_('Update frequency'), options=h.harvest_frequencies(), selected=data.frequency, error=errors.frequency) }}
+
+ {% block extra_config %}
+- {{ form.textarea('config', id='field-config', label=_('Configuration'), value=data.config, error=errors.config) }}
++ {% call form.textarea('config', id='field-config', label=_('Configuration'), value=data.config, error=errors.config) %}
++
++ {{ _('You can validate the JSON at: ') }} {{ _('JSONLint') }}
++
++ {% endcall %}
+ {% endblock extra_config %}
+
+ {# if we have a default group then this wants remembering #}
+diff --git a/ckanext/harvest/templates/source/search.html b/ckanext/harvest/templates/source/search.html
+index d9ceeea..44d118b 100644
+--- a/ckanext/harvest/templates/source/search.html
++++ b/ckanext/harvest/templates/source/search.html
+@@ -44,7 +44,26 @@
+
+
+
+-{% block secondary_content %}
++ {% block secondary_content %}
++
++ {{ _('Harvest sources') }}
++
++
++ {% trans %}
++ Harvest sources allow importing remote metadata into this catalog. Remote sources can be other catalogs such as other CKAN instances, CSW servers, XML metadata files, XLSX with metadata records or Web Accessible Folder (WAF).
++ {% endtrans %}
++
++
++
++ {{ _('Depending on the actual harvesters enabled for this instance. eg: ') }}
++
++
++
++
+ {% for facet in c.facet_titles %}
+ {{ h.snippet('snippets/facet_list.html', title=c.facet_titles[facet], name=facet, alternative_url=h.url_for('{0}.search'.format(c.dataset_type))) }}
+ {% endfor %}
diff --git a/ckan/req_fixes/ckanext-spatial_requirements.txt b/ckan/req_fixes/ckanext-spatial_requirements.txt
index 0c15f3c0..b86d5173 100644
--- a/ckan/req_fixes/ckanext-spatial_requirements.txt
+++ b/ckan/req_fixes/ckanext-spatial_requirements.txt
@@ -3,12 +3,10 @@ lxml>=2.3
argparse
pyparsing>=2.1.10
requests>=1.1.0
-six
-
-# requirements pyproj fix: https://github.com/pyproj4/pyproj/issues/1321
+cython==0.29.36; python_version < '3.9'
pyproj==2.6.1; python_version < '3.9'
pyproj==3.6.1; python_version >= '3.9'
Shapely==2.0.1
OWSLib==0.28.1
-geojson==3.0.1
\ No newline at end of file
+geojson==3.0.1
diff --git a/ckan/setup/start_ckan.sh.override b/ckan/setup/start_ckan.sh.override
index ce6eebde..84656952 100644
--- a/ckan/setup/start_ckan.sh.override
+++ b/ckan/setup/start_ckan.sh.override
@@ -1,22 +1,18 @@
#!/bin/sh
-# Add ckan.datapusher.api_token to the CKAN config file (updated with corrected value later)
-ckan config-tool $CKAN_INI ckan.datapusher.api_token=xxx
-
# Set up the Secret key used by Beaker and Flask
# This can be overriden using a CKAN___BEAKER__SESSION__SECRET env var
if grep -E "beaker.session.secret ?= ?$" ckan.ini
then
echo "Setting beaker.session.secret in ini file"
ckan config-tool $CKAN_INI "beaker.session.secret=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
- ckan config-tool $CKAN_INI "WTF_CSRF_SECRET_KEY=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
JWT_SECRET=$(python3 -c 'import secrets; print("string:" + secrets.token_urlsafe())')
ckan config-tool $CKAN_INI "api_token.jwt.encode.secret=${JWT_SECRET}"
ckan config-tool $CKAN_INI "api_token.jwt.decode.secret=${JWT_SECRET}"
fi
# Run the prerun script to init CKAN and create the default admin user
-sudo -u ckan -EH python3 prerun.py
+python3 prerun.py
# Run any startup scripts provided by images extending this one
if [[ -d "/docker-entrypoint.d" ]]
@@ -31,6 +27,14 @@ then
done
fi
+# Create Harvester logs directory and change its ownership
+mkdir -p $CKAN_LOGS_PATH/harvester
+chown -R ckan:ckan $CKAN_LOGS_PATH/harvester
+
+# Create xloader logs directory and change its ownership
+mkdir -p $CKAN_LOGS_PATH/xloader
+chown -R ckan:ckan $CKAN_LOGS_PATH/xloader
+
# Set the common uwsgi options
UWSGI_OPTS="--plugins http,python \
--socket /tmp/uwsgi.sock \
@@ -46,9 +50,18 @@ UWSGI_OPTS="--plugins http,python \
if [ $? -eq 0 ]
then
# Start supervisord
+ echo "[prerun.workers] Loading the CKAN workers with supervisord..."
supervisord --configuration /etc/supervisord.conf &
+
+ # Workers
+ ## Add harvester background procces to crontab
+ echo "[prerun.workers] Add harvester background procceses to crontab"
+ crontab -l | { cat; echo "*/15 * * * * /usr/bin/supervisorctl start ckan_harvester_run"; } | crontab -
+ ## Clean-up mechanism for the harvest log table. 'ckan.harvest.log_timeframe'. The default time frame is 30 days
+ crontab -l | { cat; echo "0 5 */30 * * /usr/bin/supervisorctl start ckan_harvester_clean_log"; } | crontab -
+
# Start uwsgi
- sudo -u ckan -EH uwsgi $UWSGI_OPTS
+ uwsgi $UWSGI_OPTS
else
echo "[prerun] failed...not starting CKAN."
-fi
\ No newline at end of file
+fi
diff --git a/ckan/setup/start_ckan_development.sh.override b/ckan/setup/start_ckan_development.sh.override
index 8dcc7465..b481c807 100644
--- a/ckan/setup/start_ckan_development.sh.override
+++ b/ckan/setup/start_ckan_development.sh.override
@@ -45,16 +45,12 @@ done
echo "Enabling debug mode"
ckan config-tool $CKAN_INI -s DEFAULT "debug = true"
-# Add ckan.datapusher.api_token to the CKAN config file (updated with corrected value later)
-ckan config-tool $CKAN_INI ckan.datapusher.api_token=xxx
-
# Set up the Secret key used by Beaker and Flask
# This can be overriden using a CKAN___BEAKER__SESSION__SECRET env var
if grep -E "beaker.session.secret ?= ?$" ckan.ini
then
echo "Setting beaker.session.secret in ini file"
ckan config-tool $CKAN_INI "beaker.session.secret=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
- ckan config-tool $CKAN_INI "WTF_CSRF_SECRET_KEY=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
JWT_SECRET=$(python3 -c 'import secrets; print("string:" + secrets.token_urlsafe())')
ckan config-tool $CKAN_INI "api_token.jwt.encode.secret=${JWT_SECRET}"
ckan config-tool $CKAN_INI "api_token.jwt.decode.secret=${JWT_SECRET}"
@@ -74,7 +70,7 @@ ckan config-tool $SRC_DIR/ckan/test-core.ini \
"ckan.redis.url = $TEST_CKAN_REDIS_URL"
# Run the prerun script to init CKAN and create the default admin user
-sudo -u ckan -EH python3 prerun.py
+python3 prerun.py
# Run any startup scripts provided by images extending this one
if [[ -d "/docker-entrypoint.d" ]]
@@ -89,8 +85,22 @@ then
done
fi
+# Create Harvester logs directory and change its ownership
+mkdir -p $CKAN_LOGS_PATH/harvester
+chown -R ckan:ckan $CKAN_LOGS_PATH/harvester
+
+# Create xloader logs directory and change its ownership
+mkdir -p $CKAN_LOGS_PATH/xloader
+chown -R ckan:ckan $CKAN_LOGS_PATH/xloader
+
# Start supervisord
-supervisord --configuration /etc/supervisord.conf &
+#supervisord --configuration /etc/supervisord.conf &
+
+# Start the development server as the ckan user with automatic reload
+su ckan -c "/usr/bin/ckan -c $CKAN_INI run -H 0.0.0.0"
-# Start the development server with automatic reload
-sudo -u ckan -EH ckan -c $CKAN_INI run -H 0.0.0.0
\ No newline at end of file
+# Workers
+# To start the Harvester worker
+# ckan harvester run
+# Clean-up mechanism for the harvest log table
+# ckan harvester clean-harvest-log
\ No newline at end of file
diff --git a/ckan/setup/workers/harvester.conf b/ckan/setup/workers/harvester.conf
new file mode 100644
index 00000000..cb1c2e3d
--- /dev/null
+++ b/ckan/setup/workers/harvester.conf
@@ -0,0 +1,51 @@
+[program:ckan_gather_consumer]
+command=ckan harvester gather-consumer
+user=ckan
+numprocs=1
+stdout_logfile=/var/log/harvester/gather_consumer.log
+stdout_logfile_maxbytes=50MB
+stderr_logfile=/var/log/harvester/gather_consumer.log
+stderr_logfile_maxbytes=50MB
+autostart=true
+autorestart=true
+startsecs=10
+priority=1
+
+[program:ckan_fetch_consumer]
+command=ckan harvester fetch-consumer
+user=ckan
+numprocs=1
+stdout_logfile=/var/log/harvester/fetch_consumer.log
+stdout_logfile_maxbytes=50MB
+stderr_logfile=/var/log/harvester/fetch_consumer.log
+stderr_logfile_maxbytes=50MB
+autostart=true
+autorestart=true
+startsecs=10
+priority=2
+
+[program:ckan_harvester_run]
+command=ckan harvester run
+user=ckan
+numprocs=1
+stdout_logfile=/var/log/harvester/ckan_harvester.log
+stdout_logfile_maxbytes=25MB
+stderr_logfile=/var/log/harvester/ckan_harvester.log
+stderr_logfile_maxbytes=25MB
+autostart=true
+autorestart=false
+startsecs=2
+priority=3
+
+[program:ckan_harvester_clean_log]
+command=ckan harvester clean-harvest-log
+user=ckan
+numprocs=1
+stdout_logfile=/var/log/harvester/ckan_harvester_clean_log.log
+stdout_logfile_maxbytes=25MB
+stderr_logfile=/var/log/harvester/ckan_harvester_clean_log.log
+stderr_logfile_maxbytes=25MB
+autostart=false
+autorestart=false
+startsecs=2
+priority=4
\ No newline at end of file
diff --git a/ckan/setup/workers/xloader.conf b/ckan/setup/workers/xloader.conf
new file mode 100644
index 00000000..c7749ddb
--- /dev/null
+++ b/ckan/setup/workers/xloader.conf
@@ -0,0 +1,12 @@
+[program:ckan_xloader]
+command=ckan jobs worker default
+user=ckan
+numprocs=1
+stdout_logfile=/var/log/harvester/ckan_xloader.log
+stdout_logfile_maxbytes=100MB
+stderr_logfile=/var/log/harvester/ckan_xloader.log
+stderr_logfile_maxbytes=100MB
+autostart=true
+autorestart=true
+startsecs=4
+priority=1
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 33ffedf1..78e93a74 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,6 +3,7 @@ version: "3"
volumes:
ckan_storage:
+ ckan_logs:
pg_data:
solr_data:
@@ -57,6 +58,7 @@ services:
condition: service_healthy
volumes:
- ckan_storage:/var/lib/ckan
+ - ckan_logs:/var/log
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "-qO", "/dev/null", "http://localhost:${CKAN_PORT}"]
diff --git a/samples/.env.apache.example b/samples/.env.apache.example
index 8615b731..bc9de228 100644
--- a/samples/.env.apache.example
+++ b/samples/.env.apache.example
@@ -89,6 +89,7 @@ CKAN_SYSADMIN_NAME=ckan_admin
CKAN_SYSADMIN_PASSWORD=test1234
CKAN_SYSADMIN_EMAIL=your_email@example.com
CKAN_STORAGE_PATH=/var/lib/ckan
+CKAN_LOGS_PATH=/var/log
CKAN_SMTP_SERVER=smtp.corporateict.domain:25
CKAN_SMTP_STARTTLS=True
CKAN_SMTP_USER=user
diff --git a/samples/.env.localhost b/samples/.env.localhost
index 880f2a9a..25434e53 100644
--- a/samples/.env.localhost
+++ b/samples/.env.localhost
@@ -97,6 +97,7 @@ CKAN_SYSADMIN_NAME=ckan_admin
CKAN_SYSADMIN_PASSWORD=test1234
CKAN_SYSADMIN_EMAIL=your_email@example.com
CKAN_STORAGE_PATH=/var/lib/ckan
+CKAN_LOGS_PATH=/var/log
CKAN_SMTP_SERVER=smtp.corporateict.domain:25
CKAN_SMTP_STARTTLS=True
CKAN_SMTP_USER=user
diff --git a/samples/.env.nginx.example b/samples/.env.nginx.example
index c80c8e8e..fcd5ad36 100644
--- a/samples/.env.nginx.example
+++ b/samples/.env.nginx.example
@@ -89,6 +89,7 @@ CKAN_SYSADMIN_NAME=ckan_admin
CKAN_SYSADMIN_PASSWORD=test1234
CKAN_SYSADMIN_EMAIL=your_email@example.com
CKAN_STORAGE_PATH=/var/lib/ckan
+CKAN_LOGS_PATH=/var/log
CKAN_SMTP_SERVER=smtp.corporateict.domain:25
CKAN_SMTP_STARTTLS=True
CKAN_SMTP_USER=user
diff --git a/samples/custom/.env.es.example b/samples/custom/.env.es.example
index 1ae16c2b..10c9efc3 100644
--- a/samples/custom/.env.es.example
+++ b/samples/custom/.env.es.example
@@ -97,6 +97,7 @@ CKAN_SYSADMIN_NAME=ckan_admin
CKAN_SYSADMIN_PASSWORD=test1234
CKAN_SYSADMIN_EMAIL=your_email@example.com
CKAN_STORAGE_PATH=/var/lib/ckan
+CKAN_LOGS_PATH=/var/log
CKAN_SMTP_SERVER=smtp.corporateict.domain:25
CKAN_SMTP_STARTTLS=True
CKAN_SMTP_USER=user