diff --git a/.gitignore b/.gitignore index a28742410..df617f797 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ development.ini node_modules *.project .eggs +.vscode/ \ No newline at end of file diff --git a/README.rst b/README.rst index 49bef88a9..1f1ec2dc8 100644 --- a/README.rst +++ b/README.rst @@ -187,10 +187,14 @@ If you don't specify this setting, the default will be number-sequence. Send error mails when harvesting fails (optional) ================================================= -If you want to send an email when a Harvest Job fails, you can set the following configuration option in the ini file: +If you want to send an email when a **Harvest Job fails**, you can set the following configuration option in the ini file: ckan.harvest.status_mail.errored = True +If you want to send an email when **all Harvest Job finishes** (whether or not it failed), you can set the following configuration option in the ini file: + + ckan.harvest.status_mail.all = True + That way, all CKAN Users who are declared as Sysadmins will receive the Error emails at their configured email address. If the Harvest-Source of the failing Harvest-Job belongs to an organization, the error-mail will also be sent to the organization-members who have the admin-role if their E-Mail is configured. If you don't specify this setting, the default will be False. diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index 334ee4bac..5248700ee 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -39,6 +39,7 @@ import ckan.lib.mailer as mailer from itertools import islice +from ckan.lib.base import render_jinja2 log = logging.getLogger(__name__) @@ -660,11 +661,22 @@ def harvest_jobs_run(context, data_dict): get_action('harvest_source_reindex')( context, {'id': job_obj.source.id}) - status = get_action('harvest_source_show_status')(context, {'id': job_obj.source.id}) + status = get_action('harvest_source_show_status')( + context, {'id': job_obj.source.id}) - if toolkit.asbool(config.get('ckan.harvest.status_mail.errored'))\ - and (status['last_job']['stats']['errored']): - send_error_mail(context, job_obj.source.id, status) + notify_all = toolkit.asbool(config.get('ckan.harvest.status_mail.all')) + notify_errors = toolkit.asbool(config.get('ckan.harvest.status_mail.errored')) + last_job_errors = status['last_job']['stats'].get('errored', 0) + + if notify_all: + subject, body = prepare_summary_mail(context, job_obj.source.id, status) + log.info('Sending summary email') + send_mail(context, job_obj.source.id, subject, body) + elif notify_errors and last_job_errors > 0: + subject, body = prepare_error_mail(context, job_obj.source_id, status) + log.info('Sending error mail') + send_mail(context, job_obj.source.id, subject, body) + else: log.debug('Ongoing job:%s source:%s', job['id'], job['source_id']) @@ -679,106 +691,146 @@ def harvest_jobs_run(context, data_dict): return [] # merely for backwards compatibility -def send_error_mail(context, source_id, status): - +def get_mail_extra_vars(context, source_id, status): last_job = status['last_job'] + harvest_objects = get_action( + 'harvest_object_list')(context, {'id': source_id}) + packages = [] + + for harvest_object in harvest_objects: + object_info = get_action( + 'harvest_object_show')(context, {'id': harvest_object}) + + packages.append(object_info.get('package')) + source = get_action('harvest_source_show')(context, {'id': source_id}) + report = get_action( + 'harvest_job_report')(context, {'id': status['last_job']['id']}) + obj_errors = [] + job_errors = [] + + for harvest_object_error_key in islice(report.get('object_errors'), 0, 20): + harvest_object_error = report.get( + 'object_errors')[harvest_object_error_key]['errors'] - ckan_site_url = config.get('ckan.site_url') - job_url = toolkit.url_for('harvest_job_show', source=source['id'], id=last_job['id']) + for error in harvest_object_error: + obj_errors.append(error['message']) - msg = toolkit._('This is a failure-notification of the latest harvest job ({0}) set-up in {1}.')\ - .format(job_url, ckan_site_url) - msg += '\n\n' + for harvest_gather_error in islice(report.get('gather_errors'), 0, 20): + job_errors.append(harvest_gather_error['message']) - msg += toolkit._('Harvest Source: {0}').format(source['title']) + '\n' - if source.get('config'): - msg += toolkit._('Harvester-Configuration: {0}').format(source['config']) + '\n' - msg += '\n\n' + if source.get('organization'): + organization = source['organization']['name'] + else: + organization = 'Not specified' + + harvest_configuration = source.get('config') + + if harvest_configuration in [None, '', '{}']: + harvest_configuration = 'Not specified' + + errors = job_errors + obj_errors + + extra_vars = { + 'organization': organization, + 'site_title': config.get('ckan.site_title'), + 'site_url': config.get('ckan.site_url'), + 'job_url': toolkit.url_for( + 'harvest_job_show', + source=source['id'], + id=last_job['id']), + 'harvest_source_title': source['title'], + 'harvest_configuration': harvest_configuration, + 'job_finished': last_job['finished'], + 'job_id': last_job['id'], + 'job_created': last_job['created'], + 'records_in_error': str(last_job['stats'].get('errored', 0)), + 'records_added': str(last_job['stats'].get('added', 0)), + 'records_deleted': str(last_job['stats'].get('deleted', 0)), + 'records_updated': str(last_job['stats'].get('updated', 0)), + 'packages': packages, + 'error_summary_title': toolkit._('Error Summary'), + 'obj_errors_title': toolkit._('Document Error'), + 'job_errors_title': toolkit._('Job Errors'), + 'obj_errors': obj_errors, + 'job_errors': job_errors, + 'errors': errors, + } - if source['organization']: - msg += toolkit._('Organization: {0}').format(source['organization']['name']) - msg += '\n\n' + return extra_vars - msg += toolkit._('Harvest Job Id: {0}').format(last_job['id']) + '\n' - msg += toolkit._('Created: {0}').format(last_job['created']) + '\n' - msg += toolkit._('Finished: {0}').format(last_job['finished']) + '\n\n' - report = get_action('harvest_job_report')(context, {'id': status['last_job']['id']}) +def prepare_summary_mail(context, source_id, status): + extra_vars = get_mail_extra_vars(context, source_id, status) + body = render_jinja2('emails/summary_email.txt', extra_vars) - msg += toolkit._('Records in Error: {0}').format(str(last_job['stats'].get('errored', 0))) - msg += '\n' + if str(status['last_job']['stats'].get('errored', 0)) == '0': + subject = '{} - Harvesting Job Successful - Summary Notification'\ + .format(config.get('ckan.site_title')) + else: + subject = '{} - Harvesting Job with Errors - Summary Notification'\ + .format(config.get('ckan.site_title')) - obj_error = '' - job_error = '' + return subject, body - for harvest_object_error_key in islice(report.get('object_errors'), 0, 20): - harvest_object_error = report.get('object_errors')[harvest_object_error_key]['errors'] - for error in harvest_object_error: - obj_error += error['message'] - for harvest_gather_error in islice(report.get('gather_errors'), 0, 20): - job_error += harvest_gather_error['message'] + '\n' - - if (obj_error != '' or job_error != ''): - msg += toolkit._('Error Summary') - msg += '\n' - - if (obj_error != ''): - msg += toolkit._('Document Error') - msg += '\n' + obj_error + '\n\n' - - if (job_error != ''): - msg += toolkit._('Job Errors') - msg += '\n' + job_error + '\n\n' - - if obj_error or job_error: - msg += '\n--\n' - msg += toolkit._('You are receiving this email because you are currently set-up as Administrator for {0}.' - ' Please do not reply to this email as it was sent from a non-monitored address.')\ - .format(config.get('ckan.site_title')) - - recipients = [] - - # gather sysadmins - model = context['model'] - sysadmins = model.Session.query(model.User).filter( - model.User.sysadmin == True # noqa: E712 - ).all() - for sysadmin in sysadmins: - recipients.append({ - 'name': sysadmin.name, - 'email': sysadmin.email - }) - - # gather organization-admins - if source.get('organization'): - members = get_action('member_list')(context, { - 'id': source['organization']['id'], - 'object_type': 'user', - 'capacity': 'admin' - }) - for member in members: - member_details = get_action('user_show')(context, {'id': member[0]}) - if member_details['email']: - recipients.append({ - 'name': member_details['name'], - 'email': member_details['email'] - }) - - for recipient in recipients: - email = {'recipient_name': recipient['name'], - 'recipient_email': recipient['email'], - 'subject': config.get('ckan.site_title') + ' - Harvesting Job - Error Notification', - 'body': msg} +def prepare_error_mail(context, source_id, status): + extra_vars = get_mail_extra_vars(context, source_id, status) + body = render_jinja2('emails/error_email.txt', extra_vars) + subject = '{} - Harvesting Job - Error Notification'\ + .format(config.get('ckan.site_title')) - try: - mailer.mail_recipient(**email) - except mailer.MailerException: - log.error('Sending Harvest-Notification-Mail failed. Message: ' + msg) - except Exception as e: - log.error(e) - raise + return subject, body + + +def send_mail(context, source_id, subject, body): + source = get_action('harvest_source_show')(context, {'id': source_id}) + recipients = [] + + # gather sysadmins + model = context['model'] + sysadmins = model.Session.query(model.User).filter( + model.User.sysadmin == True # noqa: E712 + ).all() + + for sysadmin in sysadmins: + recipients.append({ + 'name': sysadmin.name, + 'email': sysadmin.email + }) + + # gather organization-admins + if source.get('organization'): + members = get_action('member_list')(context, { + 'id': source['organization']['id'], + 'object_type': 'user', + 'capacity': 'admin' + }) + + for member in members: + member_details = get_action( + 'user_show')(context, {'id': member[0]}) + + if member_details['email']: + recipients.append({ + 'name': member_details['name'], + 'email': member_details['email'] + }) + + for recipient in recipients: + email = {'recipient_name': recipient['name'], + 'recipient_email': recipient['email'], + 'subject': subject, + 'body': body} + + try: + mailer.mail_recipient(**email) + except mailer.MailerException: + log.error( + 'Sending Harvest-Notification-Mail failed. Message: ' + body) + except Exception as e: + log.error(e) + raise def harvest_send_job_to_gather_queue(context, data_dict): diff --git a/ckanext/harvest/templates/emails/error_email.txt b/ckanext/harvest/templates/emails/error_email.txt new file mode 100644 index 000000000..17b271459 --- /dev/null +++ b/ckanext/harvest/templates/emails/error_email.txt @@ -0,0 +1,33 @@ +This is a failure notification of the latest harvest job ({{ job_url }}) set-up in {{ site_url }}. + +Harvest Source: {{ harvest_source_title }} +Harvest Configuration: {{ harvest_configuration | safe }} + +Organization: {{ organization }} + +Harvest Job Id: {{ job_id }} +Created: {{ job_created }} +Finished: {{ job_finished }} + +Records in Error: {{ records_in_error }} +Records Added: {{ records_added }} +Records Updated: {{ records_updated }} +Records Deleted: {{ records_deleted }} + +{{ error_summary_title }} + - {{ errors|length }} errors +{{ job_errors_title }}: {{ job_errors|length }} +{% for error in job_errors %} + - {{ error }} {% endfor %} + +{{ obj_errors_title }}: {{ obj_errors|length }} +{% for error in obj_errors %} + - {{ error }} {% endfor %} + +Total packages: {{ packages|length }} +{% for package in packages %} + - {{ package }}{% endfor %} + +-- +You are receiving this email because you are currently set-up as Administrator for {{ site_url }}. +Please do not reply to this email as it was sent from a non-monitored address. diff --git a/ckanext/harvest/templates/emails/summary_email.txt b/ckanext/harvest/templates/emails/summary_email.txt new file mode 100644 index 000000000..7dfdaf66f --- /dev/null +++ b/ckanext/harvest/templates/emails/summary_email.txt @@ -0,0 +1,33 @@ +This is a summary of the latest harvest job ({{ job_url }}) set-up in {{ site_url }}. + +Harvest Source: {{ harvest_source_title }} +Harvest Configuration: {{ harvest_configuration | safe }} + +Organization: {{ organization }} + +Harvest Job Id: {{ job_id }} +Created: {{ job_created }} +Finished: {{ job_finished }} + +Records in Error: {{ records_in_error }} +Records Added: {{ records_added }} +Records Updated: {{ records_updated }} +Records Deleted: {{ records_deleted }} + +{{ error_summary_title }} + - {{ errors|length }} errors +{{ job_errors_title }}: {{ job_errors|length }} +{% for error in job_errors %} + - {{ error }} {% endfor %} + +{{ obj_errors_title }}: {{ obj_errors|length }} +{% for error in obj_errors %} + - {{ error }} {% endfor %} + +Total packages: {{ packages|length }} +{% for package in packages %} + - {{ package }}{% endfor %} + +-- +You are receiving this email because you are currently set-up as Administrator for {{ site_url }}. +Please do not reply to this email as it was sent from a non-monitored address.