Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

working branch. #5

Merged
merged 9 commits into from
Apr 20, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion buildout.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ environment-vars = zope_i18n_compile_mo_files true
eggs =
Plone
Pillow
plone.reload
sauna.reload
transmogrify.nitf

zope-conf-additional =
%import sauna.reload

[i18ndude]
unzip = true
recipe = zc.recipe.egg
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@
zip_safe=False,
install_requires=[
'setuptools',
'isodate',
'collective.nitf',
'collective.transmogrifier',
'plone.app.transmogrifier',
'transmogrify.dexterity',
],
extras_require={
'test': ['plone.app.testing'],
Expand Down
16 changes: 16 additions & 0 deletions src/transmogrify/nitf/configure.zcml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
<configure
xmlns="http://namespaces.zope.org/zope"
xmlns:i18n="http://namespaces.zope.org/i18n"
xmlns:grok="http://namespaces.zope.org/grok"
xmlns:transmogrifier="http://namespaces.plone.org/transmogrifier"
i18n_domain="transmogrify.nitf">

<includeDependencies package="." />
<grok:grok package="." />

<transmogrifier:registerConfig
name="nitfmigrator"
Expand All @@ -20,6 +22,14 @@
configuration="xmlimport.cfg"
/>


<!-- common utilities -->
<utility
component="transmogrify.nitf.import.DirectorySource"
name="transmogrify.nitf.import.sourcedirectory"
/>

<!-- migrator utilities -->
<utility
component="transmogrify.nitf.migrator.NewsItemSource"
name="transmogrify.nitf.migrator.newsitemsource"
Expand All @@ -45,4 +55,10 @@
name="transmogrify.nitf.migrator.pprinter"
/>

<!-- xml import utilities -->
<utility
component="transmogrify.nitf.xmlsource.XMLSource"
name="transmogrify.nitf.xmlsource.xmlprocessor"
/>

</configure>
3 changes: 2 additions & 1 deletion src/transmogrify/nitf/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, transmogrifier, name, options, previous):
"""
self.previous = previous
self.directory = resolvePackageReferenceOrFile(options['directory'])
self.suffix = ".{0}".format(options['suffix'].split())
self.suffix = ".{0}".format(options['suffix'].strip())

def __iter__(self):
for item in self.previous:
Expand All @@ -34,4 +34,5 @@ def __iter__(self):
if filename.endswith(self.suffix):
filepath = os.path.join(self.directory, filename)
with open(filepath, 'r') as item:

yield item.read()
42 changes: 36 additions & 6 deletions src/transmogrify/nitf/xmlimport.cfg
Original file line number Diff line number Diff line change
@@ -1,18 +1,48 @@
[transmogrifier]
pipeline =
sourcedirectory
xmlprocesor
xmlprocessor
path-update
logger
folderarchive
constructor
schemaupdater
state-inserter
workflowupdater

[sourcedirectory]
blueprint = collective.nitf.import.sourcedirectory
directory = collective.nitf:data
blueprint = transmogrify.nitf.import.sourcedirectory
directory = transmogrify.nitf:data
suffix = xml

[xmlprocessor]
blueprint = collective.transmogrifier.constructor
blueprint = transmogrify.nitf.xmlsource.xmlprocessor
datestorage = True

[folders]
[path-update]
blueprint = collective.transmogrifier.sections.inserter
key = string:_path
value = python:"/articulos/{0}/{1}".format(item['effective'].strftime("%Y/%M/%d"), item['_path'])

[folderarchive]
blueprint = collective.transmogrifier.sections.folders

[constructor]
blueprint = collective.transmogrifier.constructor
blueprint = collective.transmogrifier.sections.constructor

[schemaupdater]
blueprint = transmogrify.dexterity.schemaupdater

[state-inserter]
blueprint = collective.transmogrifier.sections.inserter
key = string:_transitions
value = string:publish

[workflowupdater]
blueprint = plone.app.transmogrifier.workflowupdater

[logger]
blueprint = collective.transmogrifier.sections.logger
name = logger
level = INFO
key = _path
77 changes: 45 additions & 32 deletions src/transmogrify/nitf/xmlsource.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-

from isodate import parse_datetime
import xml.etree.ElementTree as etree

from zope.interface import classProvides, implements
Expand All @@ -25,16 +25,6 @@ def get_text(dom, subelemet, attribute=None):
return ''


def get_date_path(dom, subelemet, attribute):
""" Return a path 'YYYY/MM/DD' based on a date value normalized into
ISO-8601
Note: Only work with the basic format.
"""
text = get_text(dom, subelemet, attribute)
# We only need the YYYYMMDD part from the string
return "/".join([text[:4], text[4:6], text[6:8]])


class XMLSource(object):
""" Process an string containing a xml representation of a nitf object.
"""
Expand All @@ -46,48 +36,71 @@ def __init__(self, transmogrifier, name, options, previous):

def __iter__(self):
for data in self.previous:
item = {'id': '',
'path': '',
'title': '',
'subtitle': '',
'description': '',
'byline': '',
'text': '',
'genre': '',
'section': '',
'urgency': '',
'location': '',
'media': {'image': [],
'video': []}
item = {'_path': None,
'_type': 'collective.nitf.content',
# plone.app.dexterity.behaviours.metadata.IBasic
'title': None, 'description': None,
# plone.app.dexterity.behaviours.metadata.ICategorization
'subjects': [], 'language': '',
# plone.app.dexterity.behaviours.metadata.IPublication
'effective': None, 'expires': None,
# plone.app.dexterity.behaviours.metadata.IOwnership
'creators': [], 'contributors': [], 'rights': None,
# TODO: How the standar manage refenreces and related items.
# plone.app.referenceablebehavior.referenceable.IReferenceable
#'_plone.uuid': '',
# plone.app.relationfield.behavior.IRelatedItems
# 'relatedItems': (),
# collective.nitf.content.INITF
'subtitle': '', 'byline': '', 'text': '', 'genre': '',
'section': '', 'urgency': '', 'location': '',
# objects that should be created inside of the current
# NITF object.
'_media': {'images': [],
'videos': []}
}

dom = etree.fromstring(data)
head = dom.find('head')
body = dom.find('body')

item['id'] = get_text(head, 'docdata/doc-id', 'id-string').lower()
item['path'] = get_date_path(head, 'docdata/date.release', 'norm')
item['_path'] = get_text(head, 'docdata/doc-id', 'id-string').lower()
item['title'] = get_text(head, 'title')
item['subjects'] = [k.get('key') for k in \
list(head.find('docdata/key-list')) if k.get('key')]
#item['language']
sdate = get_text(head, 'docdata/date.release', 'norm')
if sdate:
item['effective'] = parse_datetime(sdate)

sdate = get_text(head, 'docdata/date.expire', 'norm')
if sdate:
item['expires'] = parse_datetime(sdate)

#sdate = get_text(head, 'docdata/date.issue', 'norm')
#if sdate:
# item['issue'] = parse_datetime(sdate)

item['genre'] = get_text(head, 'tobject/tobject.property',
'tobject.property.type')
'tobject.property.type')
item['section'] = get_text(head, 'pubdata', 'position.section')
item['urgency'] = get_text(head, 'docdata/urgency', 'ed-urg')

item['description'] = get_text(body, 'body.head/abstract/p')
item['location'] = get_text(body, 'body.head/dateline/location')
item['subtitle'] = get_text(body, 'body.head/hedline/hl2')
item['description'] = get_text(body, 'body.head/abstract')
item['byline'] = get_text(body, 'body.head/byline/person')

for elem in list(body.find('body.content')):
if elem.tag == 'media' and elem.get('media-type') == 'image':
image = dict(elem.find('media-reference'))
image = elem.find('media-reference').attrib
image['media-caption'] = get_text(elem, 'media-caption')
item['media']['image'].append(image)
item['_media']['images'].append(image)

elif elem.tag == 'media' and elem.get('media-type') == 'video':
video = dict(elem.find('media-reference'))
video = elem.find('media-reference').attrib
video['media-caption'] = get_text(elem, 'media-caption')
item['media']['video'].append(video)
item['_media']['videos'].append(video)

else: # other tag are considered part of the body text and
# should be preserved.
Expand Down