Skip to content

Commit

Permalink
Create a version number for the crawler (#30)
Browse files Browse the repository at this point in the history
It looks for a file for the class with a .version extension to be created
by the CI and uses the class name + dev during development.

* Created get_version method
* Added a spider field to carry spider related data
  • Loading branch information
adilsoncarvalho authored Nov 28, 2016
1 parent 81d876e commit d09d52a
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 0 deletions.
6 changes: 6 additions & 0 deletions nfcrawler/nfcrawler/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
from scrapy.loader.processors import MapCompose, Identity
from nfcrawler.utils import to_int, to_decimal, to_datetime, string_cleaner

# ---

class SpiderItem(Item):
version = Field()

# ---
class NFeEmitenteItem(Item):
razao_social = Field()
Expand Down Expand Up @@ -210,6 +215,7 @@ class ProdutoItem(Item):
# ---

class DocumentItem(Item):
spider = Field(serializer=SpiderItem)
nfe = Field(serializer=NFeItem)
emitente = Field(serializer=EmitenteItem)
destinatario = Field(serializer=DestinatarioItem)
Expand Down
3 changes: 3 additions & 0 deletions nfcrawler/nfcrawler/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ class DocumentLoader(ItemLoader):
class NFeLoader(CommonLoader):
default_item_class = NFeItem

class SpiderLoader(CommonLoader):
default_item_class = SpiderItem

class NFeEmitenteLoader(CommonLoader):
default_item_class = NFeEmitenteItem

Expand Down
7 changes: 7 additions & 0 deletions nfcrawler/nfcrawler/spiders/pr_nfce.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import re
from nfcrawler.loaders import *
from nfcrawler.items import *
from nfcrawler.utils import get_version

# Extracted documents at https://github.com/adilsoncarvalho/barateza-nfcrawler/wiki/NFCe-Paraná

Expand Down Expand Up @@ -36,6 +37,7 @@ def parse(self, response):

def parse_detailed_page(self, response):
loader = DocumentLoader(response=response)
loader.add_value('spider', self.get_spider(response))
loader.add_value('nfe', self.get_nfe(response))
loader.add_value('emitente', self.get_emitente(response))
loader.add_value('destinatario', self.get_destinatario(response))
Expand All @@ -45,6 +47,11 @@ def parse_detailed_page(self, response):
loader.add_value('produtos', self.get_produtos(response))
return loader.load_item()

def get_spider(self, response):
loader = SpiderLoader(response=response)
loader.add_value('version', get_version(self))
return loader.load_item()

def get_nfe(self, response):
loader = NFeLoader(response=response)
loader.add_css('chave_acesso', 'div.GeralXslt fieldset table.box tr:first-of-type td span')
Expand Down
14 changes: 14 additions & 0 deletions nfcrawler/nfcrawler/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# -*- coding: utf-8 -*-
import os
import re
import subprocess
import inspect
from unidecode import unidecode
from w3lib.html import remove_tags

Expand Down Expand Up @@ -44,3 +47,14 @@ def save_to_file(self, file_name, content):

with open(file_name, 'w') as file:
file.write(content)

def get_version(self):
version_filename = get_version_file(self)
if os.path.exists(version_filename):
version = open(version_filename, 'r').read().strip()
else:
version = 'dev'
return self.name + '/' + version

def get_version_file(self):
return os.path.splitext(inspect.getfile(self.__class__))[0] + '.version'

0 comments on commit d09d52a

Please sign in to comment.