diff --git a/hepcrawl/spiders/desy_spider.py b/hepcrawl/spiders/desy_spider.py index 05b35282..f1d40f6f 100644 --- a/hepcrawl/spiders/desy_spider.py +++ b/hepcrawl/spiders/desy_spider.py @@ -109,7 +109,9 @@ def crawl_local_directory(self): for file_name in xml_file_names: file_path = os.path.join(self.source_folder, file_name) - self.log('Local: Try to crawl local file: {0}'.format(file_path)) + self.logger.info( + 'Local: Try to crawl local file: {0}'.format(file_path) + ) yield Request( 'file://{0}'.format(file_path), callback=self.parse, @@ -133,7 +135,7 @@ def crawl_ftp_directory(self): xml_remote_files_paths = self._filter_xml_files(remote_files_paths) for remote_file in xml_remote_files_paths: - self.log( + self.logger.info( 'Remote: Try to crawl file from FTP: {0}'.format(remote_file), ) remote_file = str(remote_file) @@ -158,7 +160,7 @@ def handle_package_ftp(self, response): response(hepcrawl.http.response.Response): response containing the information about the ftp file download. """ - self.log('Visited url {}'.format(response.url)) + self.logger.info('Visited url {}'.format(response.url)) file_path = response.body yield Request( 'file://{0}'.format(file_path), @@ -207,8 +209,8 @@ def parse(self, response): """Parse a ``Desy`` XML file into a :class:`hepcrawl.utils.ParsedItem`. """ - self.log('Got record from url/path: {0}'.format(response.url)) - self.log('FTP enabled: {0}'.format(self.ftp_enabled)) + self.logger.info('Got record from url/path: {0}'.format(response.url)) + self.logger.info('FTP enabled: {0}'.format(self.ftp_enabled)) ftp_params = None if self.ftp_enabled: @@ -225,12 +227,12 @@ def parse(self, response): url_schema = 'file' hostname = None - self.log('Getting marc xml records...') + self.logger.info('Getting marc xml records...') marcxml_records = self._get_marcxml_records(response.body) - self.log('Got %d marc xml records' % len(marcxml_records)) - self.log('Getting hep records...') + self.logger.info('Got %d marc xml records' % len(marcxml_records)) + self.logger.info('Getting hep records...') hep_records = self._hep_records_from_marcxml(marcxml_records) - self.log('Got %d hep records' % len(hep_records)) + self.logger.info('Got %d hep records' % len(hep_records)) for hep_record in hep_records: files_to_download = [ @@ -244,7 +246,7 @@ def parse(self, response): if self._has_to_be_downloaded(document['url']) ] - self.log( + self.logger.info( 'Got the following attached documents to download: %s' % files_to_download ) @@ -254,7 +256,7 @@ def parse(self, response): ftp_params=ftp_params, record_format='hep', ) - self.log('Got item: %s' % parsed_item) + self.logger.info('Got item: %s' % parsed_item) yield parsed_item diff --git a/hepcrawl/spiders/elsevier_spider.py b/hepcrawl/spiders/elsevier_spider.py index e2d4e919..583f996b 100644 --- a/hepcrawl/spiders/elsevier_spider.py +++ b/hepcrawl/spiders/elsevier_spider.py @@ -168,7 +168,7 @@ def handle_feed(self, response): def handle_package(self, response): """Handle the zip package and yield a request for every XML found.""" - self.log("Visited %s" % response.url) + self.logger.info("Visited %s" % response.url) filename = os.path.basename(response.url).rstrip(".zip") # TMP dir to extract zip packages: target_folder = mkdtemp(prefix="elsevier_" + filename + "_", dir="/tmp/") diff --git a/hepcrawl/spiders/pos_spider.py b/hepcrawl/spiders/pos_spider.py index 024eff6b..c8c67505 100644 --- a/hepcrawl/spiders/pos_spider.py +++ b/hepcrawl/spiders/pos_spider.py @@ -92,7 +92,7 @@ def start_requests(self): yield Request(self.source_file) def parse(self, response): - self.log('Got record from: {response.url}'.format(**vars())) + self.logger.info('Got record from: {response.url}'.format(**vars())) response.selector.remove_namespaces() record_xml_selectors = response.selector.xpath('.//record') @@ -125,7 +125,7 @@ def get_conference_paper_page_request(self, xml_selector, meta=None): ) def parse_conference_paper(self, response): - self.log( + self.logger.info( 'Parsing conference paper from: {response.url}'.format(**vars()) ) xml_record = response.meta.get('xml_record')