Skip to content

Commit

Permalink
fixed get_date_path and media logic
Browse files Browse the repository at this point in the history
  • Loading branch information
Juan A. Diaz committed Apr 13, 2012
1 parent 4a403a4 commit fbae0bc
Showing 1 changed file with 15 additions and 16 deletions.
31 changes: 15 additions & 16 deletions src/transmogrify/nitf/xmlsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ def get_text(dom, subelemet, attribute=None):


def get_date_path(dom, subelemet, attribute):
""" Return a path ibased on a date value normalized into ISO8601
""" Return a path 'YYYY/MM/DD' based on a date value normalized into
ISO-8601
Note: Only work with the basic format.
"""
text = get_text(dom, subelemet, attribute)
text = get_text(dom, subelemet, attribute)
# We only need the YYYYMMDD part from the string
date =
return "/".join([text[:4], text[4:6], text[6:8]])


class XMLSource(object):
Expand All @@ -45,8 +46,6 @@ def __init__(self, transmogrifier, name, options, previous):

def __iter__(self):
for data in self.previous:
images = []
videos = []
item = {'id': '',
'path': '',
'title': '',
Expand All @@ -57,7 +56,10 @@ def __iter__(self):
'genre': '',
'section': '',
'urgency': '',
'location': ''}
'location': '',
'media': {'image': [],
'video': []}
}

dom = etree.fromstring(data)
head = dom.find('head')
Expand All @@ -70,25 +72,22 @@ def __iter__(self):
'tobject.property.type')
item['section'] = get_text(head, 'pubdata', 'position.section')
item['urgency'] = get_text(head, 'docdata/urgency', 'ed-urg')
item['location'] = ", ".join([
get_text(head, 'docdata/evloc', 'city'),
get_text(head, 'docdata/evloc', 'state-prov'),
get_text(head, 'docdata/evloc', 'iso-cc')])

item['location'] = get_text(body, 'body.head/dateline/location')
item['subtitle'] = get_text(body, 'body.head/hedline/hl2')
item['description'] = get_text(body, 'body.head/abstract')
item['byline'] = get_text(body, 'body.head/byline/person')

for elem in list(body.find('body.content')):
if elem.tag == 'media':
if elem.tag == 'media' and elem.get('media-type') == 'image':
image = dict(elem.find('media-reference'))
image['alt'] = get_text(elem, 'media-caption')
images.append(image)
image['media-caption'] = get_text(elem, 'media-caption')
item['media']['image'].append(image)

elif elem.tag == 'video':
elif elem.tag == 'media' and elem.get('media-type') == 'video':
video = dict(elem.find('media-reference'))
video['alt'] = get_text(elem, 'media-caption')
videos.append(video)
video['media-caption'] = get_text(elem, 'media-caption')
item['media']['video'].append(video)

else: # other tag are considered part of the body text and
# should be preserved.
Expand Down

0 comments on commit fbae0bc

Please sign in to comment.