Skip to content

Commit

Permalink
Merge pull request #5 from nueces/master
Browse files Browse the repository at this point in the history
working branch.
  • Loading branch information
hvelarde committed Apr 20, 2012
2 parents ddb0610 + beb0c47 commit d23f569
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 40 deletions.
5 changes: 4 additions & 1 deletion buildout.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ environment-vars = zope_i18n_compile_mo_files true
eggs =
Plone
Pillow
plone.reload
sauna.reload
transmogrify.nitf

zope-conf-additional =
%import sauna.reload

[i18ndude]
unzip = true
recipe = zc.recipe.egg
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@
zip_safe=False,
install_requires=[
'setuptools',
'isodate',
'collective.nitf',
'collective.transmogrifier',
'plone.app.transmogrifier',
'transmogrify.dexterity',
],
extras_require={
'test': ['plone.app.testing'],
Expand Down
16 changes: 16 additions & 0 deletions src/transmogrify/nitf/configure.zcml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
<configure
xmlns="http://namespaces.zope.org/zope"
xmlns:i18n="http://namespaces.zope.org/i18n"
xmlns:grok="http://namespaces.zope.org/grok"
xmlns:transmogrifier="http://namespaces.plone.org/transmogrifier"
i18n_domain="transmogrify.nitf">

<includeDependencies package="." />
<grok:grok package="." />

<transmogrifier:registerConfig
name="nitfmigrator"
Expand All @@ -20,6 +22,14 @@
configuration="xmlimport.cfg"
/>


<!-- common utilities -->
<utility
component="transmogrify.nitf.import.DirectorySource"
name="transmogrify.nitf.import.sourcedirectory"
/>

<!-- migrator utilities -->
<utility
component="transmogrify.nitf.migrator.NewsItemSource"
name="transmogrify.nitf.migrator.newsitemsource"
Expand All @@ -45,4 +55,10 @@
name="transmogrify.nitf.migrator.pprinter"
/>

<!-- xml import utilities -->
<utility
component="transmogrify.nitf.xmlsource.XMLSource"
name="transmogrify.nitf.xmlsource.xmlprocessor"
/>

</configure>
3 changes: 2 additions & 1 deletion src/transmogrify/nitf/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, transmogrifier, name, options, previous):
"""
self.previous = previous
self.directory = resolvePackageReferenceOrFile(options['directory'])
self.suffix = ".{0}".format(options['suffix'].split())
self.suffix = ".{0}".format(options['suffix'].strip())

def __iter__(self):
for item in self.previous:
Expand All @@ -34,4 +34,5 @@ def __iter__(self):
if filename.endswith(self.suffix):
filepath = os.path.join(self.directory, filename)
with open(filepath, 'r') as item:

yield item.read()
42 changes: 36 additions & 6 deletions src/transmogrify/nitf/xmlimport.cfg
Original file line number Diff line number Diff line change
@@ -1,18 +1,48 @@
[transmogrifier]
pipeline =
sourcedirectory
xmlprocesor
xmlprocessor
path-update
logger
folderarchive
constructor
schemaupdater
state-inserter
workflowupdater

[sourcedirectory]
blueprint = collective.nitf.import.sourcedirectory
directory = collective.nitf:data
blueprint = transmogrify.nitf.import.sourcedirectory
directory = transmogrify.nitf:data
suffix = xml

[xmlprocessor]
blueprint = collective.transmogrifier.constructor
blueprint = transmogrify.nitf.xmlsource.xmlprocessor
datestorage = True

[folders]
[path-update]
blueprint = collective.transmogrifier.sections.inserter
key = string:_path
value = python:"/articulos/{0}/{1}".format(item['effective'].strftime("%Y/%M/%d"), item['_path'])

[folderarchive]
blueprint = collective.transmogrifier.sections.folders

[constructor]
blueprint = collective.transmogrifier.constructor
blueprint = collective.transmogrifier.sections.constructor

[schemaupdater]
blueprint = transmogrify.dexterity.schemaupdater

[state-inserter]
blueprint = collective.transmogrifier.sections.inserter
key = string:_transitions
value = string:publish

[workflowupdater]
blueprint = plone.app.transmogrifier.workflowupdater

[logger]
blueprint = collective.transmogrifier.sections.logger
name = logger
level = INFO
key = _path
77 changes: 45 additions & 32 deletions src/transmogrify/nitf/xmlsource.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-

from isodate import parse_datetime
import xml.etree.ElementTree as etree

from zope.interface import classProvides, implements
Expand All @@ -25,16 +25,6 @@ def get_text(dom, subelemet, attribute=None):
return ''


def get_date_path(dom, subelemet, attribute):
""" Return a path 'YYYY/MM/DD' based on a date value normalized into
ISO-8601
Note: Only work with the basic format.
"""
text = get_text(dom, subelemet, attribute)
# We only need the YYYYMMDD part from the string
return "/".join([text[:4], text[4:6], text[6:8]])


class XMLSource(object):
""" Process an string containing a xml representation of a nitf object.
"""
Expand All @@ -46,48 +36,71 @@ def __init__(self, transmogrifier, name, options, previous):

def __iter__(self):
for data in self.previous:
item = {'id': '',
'path': '',
'title': '',
'subtitle': '',
'description': '',
'byline': '',
'text': '',
'genre': '',
'section': '',
'urgency': '',
'location': '',
'media': {'image': [],
'video': []}
item = {'_path': None,
'_type': 'collective.nitf.content',
# plone.app.dexterity.behaviours.metadata.IBasic
'title': None, 'description': None,
# plone.app.dexterity.behaviours.metadata.ICategorization
'subjects': [], 'language': '',
# plone.app.dexterity.behaviours.metadata.IPublication
'effective': None, 'expires': None,
# plone.app.dexterity.behaviours.metadata.IOwnership
'creators': [], 'contributors': [], 'rights': None,
# TODO: How the standar manage refenreces and related items.
# plone.app.referenceablebehavior.referenceable.IReferenceable
#'_plone.uuid': '',
# plone.app.relationfield.behavior.IRelatedItems
# 'relatedItems': (),
# collective.nitf.content.INITF
'subtitle': '', 'byline': '', 'text': '', 'genre': '',
'section': '', 'urgency': '', 'location': '',
# objects that should be created inside of the current
# NITF object.
'_media': {'images': [],
'videos': []}
}

dom = etree.fromstring(data)
head = dom.find('head')
body = dom.find('body')

item['id'] = get_text(head, 'docdata/doc-id', 'id-string').lower()
item['path'] = get_date_path(head, 'docdata/date.release', 'norm')
item['_path'] = get_text(head, 'docdata/doc-id', 'id-string').lower()
item['title'] = get_text(head, 'title')
item['subjects'] = [k.get('key') for k in \
list(head.find('docdata/key-list')) if k.get('key')]
#item['language']
sdate = get_text(head, 'docdata/date.release', 'norm')
if sdate:
item['effective'] = parse_datetime(sdate)

sdate = get_text(head, 'docdata/date.expire', 'norm')
if sdate:
item['expires'] = parse_datetime(sdate)

#sdate = get_text(head, 'docdata/date.issue', 'norm')
#if sdate:
# item['issue'] = parse_datetime(sdate)

item['genre'] = get_text(head, 'tobject/tobject.property',
'tobject.property.type')
'tobject.property.type')
item['section'] = get_text(head, 'pubdata', 'position.section')
item['urgency'] = get_text(head, 'docdata/urgency', 'ed-urg')

item['description'] = get_text(body, 'body.head/abstract/p')
item['location'] = get_text(body, 'body.head/dateline/location')
item['subtitle'] = get_text(body, 'body.head/hedline/hl2')
item['description'] = get_text(body, 'body.head/abstract')
item['byline'] = get_text(body, 'body.head/byline/person')

for elem in list(body.find('body.content')):
if elem.tag == 'media' and elem.get('media-type') == 'image':
image = dict(elem.find('media-reference'))
image = elem.find('media-reference').attrib
image['media-caption'] = get_text(elem, 'media-caption')
item['media']['image'].append(image)
item['_media']['images'].append(image)

elif elem.tag == 'media' and elem.get('media-type') == 'video':
video = dict(elem.find('media-reference'))
video = elem.find('media-reference').attrib
video['media-caption'] = get_text(elem, 'media-caption')
item['media']['video'].append(video)
item['_media']['videos'].append(video)

else: # other tag are considered part of the body text and
# should be preserved.
Expand Down

0 comments on commit d23f569

Please sign in to comment.