From 7d9fab528af4c2f93087da28cb52b5acdeb7dba7 Mon Sep 17 00:00:00 2001 From: nueces Date: Mon, 16 Apr 2012 09:54:46 -0300 Subject: [PATCH 1/9] replace plone.reload with sauna.reload --- buildout.cfg | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/buildout.cfg b/buildout.cfg index bf29c54..9ef4d00 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -33,9 +33,12 @@ environment-vars = zope_i18n_compile_mo_files true eggs = Plone Pillow - plone.reload + sauna.reload transmogrify.nitf +zope-conf-additional = + %import sauna.reload + [i18ndude] unzip = true recipe = zc.recipe.egg From 227b21446fb4ee89ea7b83bdc26c490ab068f8a0 Mon Sep 17 00:00:00 2001 From: nueces Date: Mon, 16 Apr 2012 09:55:47 -0300 Subject: [PATCH 2/9] Register utilities, fixed blueprint declations. --- src/transmogrify/nitf/configure.zcml | 14 ++++++++++++++ src/transmogrify/nitf/xmlimport.cfg | 14 ++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/transmogrify/nitf/configure.zcml b/src/transmogrify/nitf/configure.zcml index 6f6c68f..0dcf92d 100644 --- a/src/transmogrify/nitf/configure.zcml +++ b/src/transmogrify/nitf/configure.zcml @@ -20,6 +20,14 @@ configuration="xmlimport.cfg" /> + + + + + + + + diff --git a/src/transmogrify/nitf/xmlimport.cfg b/src/transmogrify/nitf/xmlimport.cfg index d881ac5..0aaa963 100644 --- a/src/transmogrify/nitf/xmlimport.cfg +++ b/src/transmogrify/nitf/xmlimport.cfg @@ -1,18 +1,20 @@ [transmogrifier] pipeline = sourcedirectory - xmlprocesor + xmlprocessor + archivefolders constructor [sourcedirectory] -blueprint = collective.nitf.import.sourcedirectory -directory = collective.nitf:data +blueprint = transmogrify.nitf.import.sourcedirectory +directory = transmogrify.nitf:data +suffix = xml [xmlprocessor] -blueprint = collective.transmogrifier.constructor +blueprint = transmogrify.nitf.xmlsource.xmlprocessor -[folders] +[archivefolders] blueprint = collective.transmogrifier.sections.folders [constructor] -blueprint = collective.transmogrifier.constructor +blueprint = collective.transmogrifier.sections.constructor From 22d100988fecfe60dfef92f1541e5c9927b96f89 Mon Sep 17 00:00:00 2001 From: nueces Date: Mon, 16 Apr 2012 19:01:35 -0300 Subject: [PATCH 3/9] package groked. --- src/transmogrify/nitf/configure.zcml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transmogrify/nitf/configure.zcml b/src/transmogrify/nitf/configure.zcml index 0dcf92d..144e451 100644 --- a/src/transmogrify/nitf/configure.zcml +++ b/src/transmogrify/nitf/configure.zcml @@ -1,10 +1,12 @@ + Date: Mon, 16 Apr 2012 19:06:20 -0300 Subject: [PATCH 4/9] Fixing suffix attribute. Fixing media-caption attibutes. --- src/transmogrify/nitf/import.py | 3 ++- src/transmogrify/nitf/xmlsource.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/transmogrify/nitf/import.py b/src/transmogrify/nitf/import.py index 9b61fd5..6f941f7 100644 --- a/src/transmogrify/nitf/import.py +++ b/src/transmogrify/nitf/import.py @@ -24,7 +24,7 @@ def __init__(self, transmogrifier, name, options, previous): """ self.previous = previous self.directory = resolvePackageReferenceOrFile(options['directory']) - self.suffix = ".{0}".format(options['suffix'].split()) + self.suffix = ".{0}".format(options['suffix'].strip()) def __iter__(self): for item in self.previous: @@ -34,4 +34,5 @@ def __iter__(self): if filename.endswith(self.suffix): filepath = os.path.join(self.directory, filename) with open(filepath, 'r') as item: + yield item.read() diff --git a/src/transmogrify/nitf/xmlsource.py b/src/transmogrify/nitf/xmlsource.py index 0ae2dd8..b5320f4 100644 --- a/src/transmogrify/nitf/xmlsource.py +++ b/src/transmogrify/nitf/xmlsource.py @@ -47,6 +47,7 @@ def __init__(self, transmogrifier, name, options, previous): def __iter__(self): for data in self.previous: item = {'id': '', + '_type': 'collective.nitf.content', 'path': '', 'title': '', 'subtitle': '', @@ -80,12 +81,12 @@ def __iter__(self): for elem in list(body.find('body.content')): if elem.tag == 'media' and elem.get('media-type') == 'image': - image = dict(elem.find('media-reference')) + image = elem.find('media-reference').attrib image['media-caption'] = get_text(elem, 'media-caption') item['media']['image'].append(image) elif elem.tag == 'media' and elem.get('media-type') == 'video': - video = dict(elem.find('media-reference')) + video = elem.find('media-reference').attrib video['media-caption'] = get_text(elem, 'media-caption') item['media']['video'].append(video) From af2d00218517170de62db993de498b77353264d7 Mon Sep 17 00:00:00 2001 From: nueces Date: Tue, 17 Apr 2012 03:34:33 -0300 Subject: [PATCH 5/9] Renaming key. --- src/transmogrify/nitf/xmlsource.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transmogrify/nitf/xmlsource.py b/src/transmogrify/nitf/xmlsource.py index b5320f4..3661a76 100644 --- a/src/transmogrify/nitf/xmlsource.py +++ b/src/transmogrify/nitf/xmlsource.py @@ -58,8 +58,8 @@ def __iter__(self): 'section': '', 'urgency': '', 'location': '', - 'media': {'image': [], - 'video': []} + 'media': {'images': [], + 'videos': []} } dom = etree.fromstring(data) @@ -83,12 +83,12 @@ def __iter__(self): if elem.tag == 'media' and elem.get('media-type') == 'image': image = elem.find('media-reference').attrib image['media-caption'] = get_text(elem, 'media-caption') - item['media']['image'].append(image) + item['media']['images'].append(image) elif elem.tag == 'media' and elem.get('media-type') == 'video': video = elem.find('media-reference').attrib video['media-caption'] = get_text(elem, 'media-caption') - item['media']['video'].append(video) + item['media']['videos'].append(video) else: # other tag are considered part of the body text and # should be preserved. From a27b0321c1add2f48420daacec30400a8cfafdb2 Mon Sep 17 00:00:00 2001 From: nueces Date: Thu, 19 Apr 2012 19:55:23 -0300 Subject: [PATCH 6/9] Added transmogrify.dexterity for use the schemaupdater. Added isodate for parse date strings. --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 7176884..a0e02d7 100644 --- a/setup.py +++ b/setup.py @@ -31,9 +31,11 @@ zip_safe=False, install_requires=[ 'setuptools', + 'isodate', 'collective.nitf', 'collective.transmogrifier', 'plone.app.transmogrifier', + 'transmogrify.dexterity', ], extras_require={ 'test': ['plone.app.testing'], From 3581204f515033214fbbefc0b8c7aa6ae55bfea2 Mon Sep 17 00:00:00 2001 From: nueces Date: Thu, 19 Apr 2012 19:58:38 -0300 Subject: [PATCH 7/9] Added isodate to parse string dates. Removed method to contruct the path based on dates for archivement, it could be modified in the transmogrifier pipeline. Added new keywods into the item dict based on the default behaviours of the collectite.nift content type definition. --- src/transmogrify/nitf/xmlsource.py | 70 +++++++++++++++++------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/src/transmogrify/nitf/xmlsource.py b/src/transmogrify/nitf/xmlsource.py index 3661a76..55e029d 100644 --- a/src/transmogrify/nitf/xmlsource.py +++ b/src/transmogrify/nitf/xmlsource.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +from isodate import parse_datetime import xml.etree.ElementTree as etree from zope.interface import classProvides, implements @@ -25,16 +25,6 @@ def get_text(dom, subelemet, attribute=None): return '' -def get_date_path(dom, subelemet, attribute): - """ Return a path 'YYYY/MM/DD' based on a date value normalized into - ISO-8601 - Note: Only work with the basic format. - """ - text = get_text(dom, subelemet, attribute) - # We only need the YYYYMMDD part from the string - return "/".join([text[:4], text[4:6], text[6:8]]) - - class XMLSource(object): """ Process an string containing a xml representation of a nitf object. """ @@ -46,19 +36,27 @@ def __init__(self, transmogrifier, name, options, previous): def __iter__(self): for data in self.previous: - item = {'id': '', + item = {'_path': None, '_type': 'collective.nitf.content', - 'path': '', - 'title': '', - 'subtitle': '', - 'description': '', - 'byline': '', - 'text': '', - 'genre': '', - 'section': '', - 'urgency': '', - 'location': '', - 'media': {'images': [], + # plone.app.dexterity.behaviours.metadata.IBasic + 'title': None, 'description': None, + # plone.app.dexterity.behaviours.metadata.ICategorization + 'subject': [], 'language': '', + # plone.app.dexterity.behaviours.metadata.IPublication + 'effective': None, 'expires': None, + # plone.app.dexterity.behaviours.metadata.IOwnership + 'creators': [], 'contributors': [], 'rights': None, + # TODO: How the standar manage refenreces and related items. + # plone.app.referenceablebehavior.referenceable.IReferenceable + #'_plone.uuid': '', + # plone.app.relationfield.behavior.IRelatedItems + # 'relatedItems': (), + # collective.nitf.content.INITF + 'subtitle': '', 'byline': '', 'text': '', 'genre': '', + 'section': '', 'urgency': '', 'location': '', + # objects that should be created inside of the current + # NITF object. + '_media': {'images': [], 'videos': []} } @@ -66,29 +64,43 @@ def __iter__(self): head = dom.find('head') body = dom.find('body') - item['id'] = get_text(head, 'docdata/doc-id', 'id-string').lower() - item['path'] = get_date_path(head, 'docdata/date.release', 'norm') + item['_path'] = get_text(head, 'docdata/doc-id', 'id-string').lower() item['title'] = get_text(head, 'title') + item['subject'] = [k.get('key') for k in \ + list(head.find('docdata/key-list')) if k.get('key')] + #item['language'] + sdate = get_text(head, 'docdata/date.release', 'norm') + if sdate: + item['effective'] = parse_datetime(sdate) + + sdate = get_text(head, 'docdata/date.expire', 'norm') + if sdate: + item['expires'] = parse_datetime(sdate) + + #sdate = get_text(head, 'docdata/date.issue', 'norm') + #if sdate: + # item['issue'] = parse_datetime(sdate) + item['genre'] = get_text(head, 'tobject/tobject.property', - 'tobject.property.type') + 'tobject.property.type') item['section'] = get_text(head, 'pubdata', 'position.section') item['urgency'] = get_text(head, 'docdata/urgency', 'ed-urg') + item['description'] = get_text(body, 'body.head/abstract/p') item['location'] = get_text(body, 'body.head/dateline/location') item['subtitle'] = get_text(body, 'body.head/hedline/hl2') - item['description'] = get_text(body, 'body.head/abstract') item['byline'] = get_text(body, 'body.head/byline/person') for elem in list(body.find('body.content')): if elem.tag == 'media' and elem.get('media-type') == 'image': image = elem.find('media-reference').attrib image['media-caption'] = get_text(elem, 'media-caption') - item['media']['images'].append(image) + item['_media']['images'].append(image) elif elem.tag == 'media' and elem.get('media-type') == 'video': video = elem.find('media-reference').attrib video['media-caption'] = get_text(elem, 'media-caption') - item['media']['videos'].append(video) + item['_media']['videos'].append(video) else: # other tag are considered part of the body text and # should be preserved. From 4b326726fbf650ee8c122b8907b3020a9fcd82ec Mon Sep 17 00:00:00 2001 From: nueces Date: Thu, 19 Apr 2012 20:05:05 -0300 Subject: [PATCH 8/9] Added section to: - generate the object path based on dates. - publish the object. - update the dexterity schema. --- src/transmogrify/nitf/xmlimport.cfg | 32 +++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/transmogrify/nitf/xmlimport.cfg b/src/transmogrify/nitf/xmlimport.cfg index 0aaa963..be32ac1 100644 --- a/src/transmogrify/nitf/xmlimport.cfg +++ b/src/transmogrify/nitf/xmlimport.cfg @@ -2,8 +2,13 @@ pipeline = sourcedirectory xmlprocessor - archivefolders + path-update + logger + folderarchive constructor + schemaupdater + state-inserter + workflowupdater [sourcedirectory] blueprint = transmogrify.nitf.import.sourcedirectory @@ -12,9 +17,32 @@ suffix = xml [xmlprocessor] blueprint = transmogrify.nitf.xmlsource.xmlprocessor +datestorage = True -[archivefolders] +[path-update] +blueprint = collective.transmogrifier.sections.inserter +key = string:_path +value = python:"/articulos/{0}/{1}".format(item['effective'].strftime("%Y/%M/%d"), item['_path']) + +[folderarchive] blueprint = collective.transmogrifier.sections.folders [constructor] blueprint = collective.transmogrifier.sections.constructor + +[schemaupdater] +blueprint = transmogrify.dexterity.schemaupdater + +[state-inserter] +blueprint = collective.transmogrifier.sections.inserter +key = string:_transitions +value = string:publish + +[workflowupdater] +blueprint = plone.app.transmogrifier.workflowupdater + +[logger] +blueprint = collective.transmogrifier.sections.logger +name = logger +level = INFO +key = _path From beb0c47c03b554644b4a31b45c3ee02975098f81 Mon Sep 17 00:00:00 2001 From: nueces Date: Fri, 20 Apr 2012 03:32:09 -0300 Subject: [PATCH 9/9] Fixed typo in the subjects field. --- src/transmogrify/nitf/xmlsource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transmogrify/nitf/xmlsource.py b/src/transmogrify/nitf/xmlsource.py index 55e029d..15fc881 100644 --- a/src/transmogrify/nitf/xmlsource.py +++ b/src/transmogrify/nitf/xmlsource.py @@ -41,7 +41,7 @@ def __iter__(self): # plone.app.dexterity.behaviours.metadata.IBasic 'title': None, 'description': None, # plone.app.dexterity.behaviours.metadata.ICategorization - 'subject': [], 'language': '', + 'subjects': [], 'language': '', # plone.app.dexterity.behaviours.metadata.IPublication 'effective': None, 'expires': None, # plone.app.dexterity.behaviours.metadata.IOwnership @@ -66,7 +66,7 @@ def __iter__(self): item['_path'] = get_text(head, 'docdata/doc-id', 'id-string').lower() item['title'] = get_text(head, 'title') - item['subject'] = [k.get('key') for k in \ + item['subjects'] = [k.get('key') for k in \ list(head.find('docdata/key-list')) if k.get('key')] #item['language'] sdate = get_text(head, 'docdata/date.release', 'norm')