Merge pull request #3 from nueces/master

some bits for the xml importer
hvelarde · Apr 13, 2012 · ddb0610 · ddb0610
2 parents 0c10da7 + fbae0bc
commit ddb0610
Show file tree

Hide file tree

Showing 6 changed files with 199 additions and 13 deletions.
diff --git a/buildout.cfg b/buildout.cfg
@@ -1,7 +1,6 @@
 [buildout]
 extensions =
     buildout.dumppickedversions
-    buildout.threatlevel
 
 develop = .
 unzip = true
@@ -11,6 +10,9 @@ parts =
     omelette
     test
     zopepy
+    pylint
+    pyflakes
+    pep8
 
 extends =
     http://dist.plone.org/release/4.1.2/versions.cfg
@@ -55,3 +57,29 @@ recipe = zc.recipe.egg
 eggs = ${instance:eggs}
 interpreter = zopepy
 scripts = zopepy
+
+[pylint]
+recipe = zc.recipe.egg
+eggs =
+    pylint
+    ${instance:eggs}
+entry-points = pylint=pylint.lint:Run
+scripts = pylint
+arguments = [
+    '--output-format=colorized',
+    '--zope=y',
+    '--reports=no',
+#Suppress certain errors (interfaces missing __init__, invalid imports etc)
+    '--disable=E0611,F0401,W0232',
+    ] + sys.argv[1:]
+
+[pyflakes]
+recipe = zc.recipe.egg
+scripts = pyflakes
+eggs = pyflakes
+#pyflakes reads sys.argv directly
+entry-points = pyflakes=pyflakes.scripts.pyflakes:main
+
+[pep8]
+recipe = zc.recipe.egg
+eggs = pep8
diff --git a/src/transmogrify/nitf/configure.zcml b/src/transmogrify/nitf/configure.zcml
@@ -13,29 +13,36 @@
      configuration="migrator.cfg"
      />
 
+  <transmogrifier:registerConfig
+     name="nitfxmlimport"
+     title="Import XML files into NITF objects"
+     description="Create NITF objects from XML files"
+     configuration="xmlimport.cfg"
+     />
+
   <utility
-     component="collective.nitf.migrator.NewsItemSource"
-     name="collective.nitf.migrator.newsitemsource"
+     component="transmogrify.nitf.migrator.NewsItemSource"
+     name="transmogrify.nitf.migrator.newsitemsource"
      />
 
   <utility
-     component="collective.nitf.migrator.SchemaUpdater"
-     name="collective.nitf.migrator.schemaupdater"
+     component="transmogrify.nitf.migrator.SchemaUpdater"
+     name="transmogrify.nitf.migrator.schemaupdater"
      />
 
   <utility
-     component="collective.nitf.migrator.ImageMigrator"
-     name="collective.nitf.migrator.imagemigrator"
+     component="transmogrify.nitf.migrator.ImageMigrator"
+     name="transmogrify.nitf.migrator.imagemigrator"
      />
 
   <utility
-     component="collective.nitf.migrator.ReplaceObject"
-     name="collective.nitf.migrator.replaceobject"
+     component="transmogrify.nitf.migrator.ReplaceObject"
+     name="transmogrify.nitf.migrator.replaceobject"
      />
 
   <utility
-     component="collective.nitf.migrator.PrettyPrinter"
-     name="collective.nitf.migrator.pprinter"
+     component="transmogrify.nitf.migrator.PrettyPrinter"
+     name="transmogrify.nitf.migrator.pprinter"
      />
 
 </configure>
diff --git a/src/transmogrify/nitf/import.py b/src/transmogrify/nitf/import.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+import os
+
+from zope.interface import classProvides, implements
+from collective.transmogrifier.interfaces import ISectionBlueprint
+from collective.transmogrifier.interfaces import ISection
+from collective.transmogrifier.utils import resolvePackageReferenceOrFile
+
+
+class DirectorySource(object):
+    """ Reads the directory's contents and yield a readlines for every file.
+    """
+    classProvides(ISectionBlueprint)
+    implements(ISection)
+
+    def __init__(self, transmogrifier, name, options, previous):
+        """ Takes two options:
+
+            directory: A full path to a directory or a relative path inside a
+                       package in the form collective.example:datadir.
+
+            suffix: The extension of files that should be processed.
+        """
+        self.previous = previous
+        self.directory = resolvePackageReferenceOrFile(options['directory'])
+        self.suffix = ".{0}".format(options['suffix'].split())
+
+    def __iter__(self):
+        for item in self.previous:
+            yield item
+
+        for filename in os.listdir(self.directory):
+            if filename.endswith(self.suffix):
+                filepath = os.path.join(self.directory, filename)
+                with open(filepath, 'r') as item:
+                    yield item.read()
diff --git a/src/transmogrify/nitf/migrator.py b/src/transmogrify/nitf/migrator.py
@@ -23,7 +23,7 @@
 from collective.transmogrifier.transmogrifier import Transmogrifier
 
 from collective.nitf.content import INITF
-from collective.nitf.content import kind_default_value
+from collective.nitf.content import genre_default_value
 from collective.nitf.content import section_default_value
 from collective.nitf.content import urgency_default_value
 
@@ -111,7 +111,7 @@ def __iter__(self):
             #obj.abstract = item['description']
             obj.byline = ''
             obj.text = RichTextValue(item['text'], 'text/html', 'text/x-html-safe')
-            obj.kind = kind_default_value(None)
+            obj.genre = genre_default_value(None)
             obj.section = section_default_value(None)
             obj.urgency = urgency_default_value(None)
 

diff --git a/src/transmogrify/nitf/xmlimport.cfg b/src/transmogrify/nitf/xmlimport.cfg
@@ -0,0 +1,18 @@
+[transmogrifier]
+pipeline =
+    sourcedirectory
+    xmlprocesor
+    constructor
+
+[sourcedirectory]
+blueprint = collective.nitf.import.sourcedirectory
+directory = collective.nitf:data
+
+[xmlprocessor]
+blueprint = collective.transmogrifier.constructor
+
+[folders]
+blueprint = collective.transmogrifier.sections.folders
+
+[constructor]
+blueprint = collective.transmogrifier.constructor
diff --git a/src/transmogrify/nitf/xmlsource.py b/src/transmogrify/nitf/xmlsource.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+
+import xml.etree.ElementTree as etree
+
+from zope.interface import classProvides, implements
+from collective.transmogrifier.interfaces import ISection
+from collective.transmogrifier.interfaces import ISectionBlueprint
+
+#from collective.nitf.content import genre_default_value
+#from collective.nitf.content import section_default_value
+#from collective.nitf.content import urgency_default_value
+
+
+def get_text(dom, subelemet, attribute=None):
+    """ Return the text value for a node xor a attribute value from that node.
+    """
+    elem = dom.find(subelemet)
+    if elem is not None:
+        if attribute is None:
+            return elem.text
+
+        elif attribute in elem.keys():
+            return elem.get(attribute)
+
+    return ''
+
+
+def get_date_path(dom, subelemet, attribute):
+    """ Return a path 'YYYY/MM/DD' based on a date value normalized into
+        ISO-8601
+        Note: Only work with the basic format.
+    """
+    text = get_text(dom, subelemet, attribute)
+    # We only need the YYYYMMDD part from the string
+    return "/".join([text[:4], text[4:6], text[6:8]])
+
+
+class XMLSource(object):
+    """ Process an string containing a xml representation of a nitf object.
+    """
+    classProvides(ISectionBlueprint)
+    implements(ISection)
+
+    def __init__(self, transmogrifier, name, options, previous):
+        self.previous = previous
+
+    def __iter__(self):
+        for data in self.previous:
+            item = {'id': '',
+                    'path': '',
+                    'title': '',
+                    'subtitle': '',
+                    'description': '',
+                    'byline': '',
+                    'text': '',
+                    'genre': '',
+                    'section': '',
+                    'urgency': '',
+                    'location': '',
+                    'media': {'image': [],
+                              'video': []}
+                    }
+
+            dom = etree.fromstring(data)
+            head = dom.find('head')
+            body = dom.find('body')
+
+            item['id'] = get_text(head, 'docdata/doc-id', 'id-string').lower()
+            item['path'] = get_date_path(head, 'docdata/date.release', 'norm')
+            item['title'] = get_text(head, 'title')
+            item['genre'] = get_text(head, 'tobject/tobject.property',
+                                           'tobject.property.type')
+            item['section'] = get_text(head, 'pubdata', 'position.section')
+            item['urgency'] = get_text(head, 'docdata/urgency', 'ed-urg')
+
+            item['location'] = get_text(body, 'body.head/dateline/location')
+            item['subtitle'] = get_text(body, 'body.head/hedline/hl2')
+            item['description'] = get_text(body, 'body.head/abstract')
+            item['byline'] = get_text(body, 'body.head/byline/person')
+
+            for elem in list(body.find('body.content')):
+                if elem.tag == 'media' and elem.get('media-type') == 'image':
+                    image = dict(elem.find('media-reference'))
+                    image['media-caption'] = get_text(elem, 'media-caption')
+                    item['media']['image'].append(image)
+
+                elif elem.tag == 'media' and elem.get('media-type') == 'video':
+                    video = dict(elem.find('media-reference'))
+                    video['media-caption'] = get_text(elem, 'media-caption')
+                    item['media']['video'].append(video)
+
+                else:   # other tag are considered part of the body text and
+                        # should be preserved.
+                    item['text'] += etree.tostring(elem)
+
+            yield item