Skip to content

Commit

Permalink
Initial pass at #35; add stubs for #36 and #37
Browse files Browse the repository at this point in the history
tla committed Apr 17, 2015
1 parent f7ef45a commit e221133
Showing 4 changed files with 220 additions and 0 deletions.
98 changes: 98 additions & 0 deletions base/lib/Text/Tradition.pm
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package Text::Tradition;

use Encode qw/ decode_utf8 /;
use JSON qw / from_json /;
use Module::Load;
use Moose;
@@ -337,6 +338,103 @@ sub BUILD {
return $self;
}

=head2 as_graphml
Export the entire tradition as XML, with the collation expressed in GraphML.
=cut

sub as_graphml {
my $self = shift;
return $self->_get_document( 'as_graphml', @_ );
}

=head2 as_tei_ps
Export the tradition in TEI P5 format, using the parallel-segmentation
method of critical apparatus.
=begin testing
use Text::Tradition;
use XML::LibXML;
my $s = Text::Tradition->new(
name => 'inline',
input => 'Tabular',
file => 't/data/simple.txt',
);
my $docstr = $s->as_tei_ps();
my $doc = XML::LibXML->load_xml( string => $docstr );
my $tei = $doc->documentElement;
is( $tei->nodeName, 'TEI', "Got a TEI document back out" );
# TODO test existence of witnesses, and that there are 4 apps in the output
is( $tei->getElementsByTagName('witness')->size, 3, "Found three witnesses" );
is( $tei->getElementsByTagName('app')->size, 4, "Found four apparatus entries" );
=end testing
=cut

sub as_tei_ps {
my $self = shift;
return $self->_get_document( 'as_tei_ps', @_ );
}

=head2 as_tei_dea( $base )
Export the tradition in TEI P5 format, using the double-endpoint-attachment
method of critical apparatus. A single witness may be specified as the base;
the default is to use the majority text as a base.
=cut

sub as_tei_dea {
my $self = shift;
return $self->_get_document( 'as_tei_dea', @_ );
}

## TODO Implement the other export methods here.

sub _get_document {
my( $self, $format, @args ) = @_;
my $doc = $self->_make_tei_frame;
my( $textel ) = $doc->documentElement->getElementsByTagName('text');
$DB::single = 1;
my $content = $self->collation->$format( @args );
$doc->documentElement->appendChild( $content );
return decode_utf8( $doc->toString(1) );
}

sub _make_tei_frame {
my $self = shift;
my $tei_ns = 'http://www.tei-c.org/ns/1.0';
my $doc = XML::LibXML->createDocument( "1.0", "UTF-8" );
my $root = $doc->createElementNS( $tei_ns, 'TEI' );
$doc->setDocumentElement( $root );
my $teiheader = $root->addNewChild( $tei_ns, 'teiHeader' );
my $filedesc = $teiheader->addNewChild( $tei_ns, 'fileDesc' );
$filedesc->addNewChild( $tei_ns, 'titleStmt' )->
addNewChild( $tei_ns, 'title' )->
appendText( $self->name );
$filedesc->addNewChild( $tei_ns, 'publicationStmt' )->
addNewChild( $tei_ns, 'p' )->
appendText( 'Created by the Text::Tradition library' );
my $witnesslist = $filedesc->addNewChild( $tei_ns, 'sourceDesc')->
addNewChild( $tei_ns, 'listWit' );
foreach my $wit ( $self->witnesses ) {
my $wit_el = $witnesslist->addNewChild( $tei_ns, 'witness' );
$wit_el->setAttribute( 'xml:id', $wit->sigil );
if( $wit->has_identifier ) {
$wit_el->appendText( $wit->identifier );
} else {
$wit_el->appendText( 'Unidentified manuscript' );
}
}
return $doc;
}

=head2 clear_collation
Blow away the existing collation object and mark all witnesses as uncollated.
99 changes: 99 additions & 0 deletions base/lib/Text/Tradition/Collation.pm
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ use File::Which;
use Graph;
use IPC::Run qw( run binary );
use JSON qw/ to_json /;
use Set::Scalar;
use Text::CSV;
use Text::Tradition::Collation::Data;
use Text::Tradition::Collation::Reading;
@@ -1684,6 +1685,104 @@ sub _add_graphml_data {
$data_el->appendText( $value );
}


=head2 as_tei_ps
Returns an XML::LibXML TEI 'text' element that contains the collation in
parallel segmentation format.
=cut

sub as_tei_ps {
my( $self, $opts ) = @_;

# Some namespaces
my $tei_ns = 'http://www.tei-c.org/ns/1.0';

# Create the document and root node
require XML::LibXML;
# Put the whole text into an anonymous block
my $text_el = XML::LibXML::Element->new('text');
$text_el->setNamespace( $tei_ns );
my $container = $text_el->addNewChild( $tei_ns, 'body' )
->addNewChild( $tei_ns, 'ab' );
# Now step through the text, rank by rank, making apparatus elements
# where necessary. We assume that the sigla will be the XML IDs of the
# witnesses in the final document.
my $table = $self->alignment_table( $opts );
my @all_witnesses = map { $_->{witness} } @{$table->{alignment}};
my $join_next;
foreach my $idx ( 0 .. $table->{length} - 1 ) {
my @rowobjs = map { $_->{tokens}->[$idx] } @{$table->{alignment}};
# Get the set of readings here.
my $rdgs = Set::Scalar->new();
$rdgs->insert( map { $_ ? $_->{t} : '' } @rowobjs );
if( $rdgs->size == 1 ) {
# It doesn't need an app. Join the reading to the current text.
my $rdg = $rdgs->[0];
$join_next = $self->_append_reading_text( $container, $rdg, $join_next );
} else {
# We need to make an apparatus.
my $app = $container->addNewChild( $tei_ns, 'app' );
my %wits_present;
map { $wits_present{$_} = 0 } @all_witnesses;
foreach my $rdg ( @$rdgs ) {
next unless $rdg;
map { $wits_present{$_} = 1 } $rdg->witnesses;
my $rdg_el = $app->addNewChild( $tei_ns, 'rdg' );
$rdg_el->setAttribute( 'wit', $self->_make_xml_witstring( $rdg ) );
$join_next = $self->_append_reading_text( $rdg_el, $rdg, $join_next );
my $ac_wits = $self->_make_xml_witstring( $rdg, 1 );
if( $ac_wits ) {
my $rdg_ac_el = $app->addNewChild( $tei_ns, 'rdg' );
$rdg_ac_el->setAttribute( 'wit', $ac_wits );
$rdg_ac_el->setAttribute( 'type', 'a.c.' );
# This implies that all parallel readings will have the same
# join_next setting. Might want to think harder about that.
$join_next = $self->_append_reading_text( $rdg_el, $rdg, $join_next );
}
}
# Now check for an empty reading.
my @missing = grep { $wits_present{$_} == 0 } @all_witnesses;
if( @missing ) {
warn( "Missing witnesses @missing at rank $idx when there is no empty reading!" )
unless $rdgs->has('');
my $rdg_el = $app->addNewChild( $tei_ns, 'rdg' );
$rdg_el->setAttribute( 'wit', join( ' ', map { "#$_" } @missing ) );
}
}
}
# Great. Return the text element.
return $text_el;
}

sub _append_reading_text {
my( $self, $el, $rdg, $join_next ) = @_;
$el->appendText( $self->wordsep )
unless $join_next || $rdg->join_prior;
$el->appendText( $rdg->text );
return $rdg->join_next;
}

sub _make_xml_witstring {
my( $self, $rdg, $aclayer ) = @_;
my @witlist;
my $aclabel = $self->ac_label;
if( $aclayer ) {
foreach my $wit ( $rdg->witnesses ) {
if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
push( @witlist, $1 );
}
}
} else {
@witlist = grep { $_ !~ /^(.*)\Q$aclabel\E$/ } $rdg->witnesses;
}
return join( ' ', map { "#$_" } @witlist );
}


=head2 as_tei_dea( $options )
=head2 as_csv
Returns a CSV alignment table representation of the collation graph, one
1 change: 1 addition & 0 deletions base/lib/Text/Tradition/Witness.pm
Original file line number Diff line number Diff line change
@@ -231,6 +231,7 @@ has 'ascii_sigil' => (
has 'identifier' => (
is => 'rw',
isa => 'Str',
predicate => 'has_identifier',
);

has 'settlement' => (
22 changes: 22 additions & 0 deletions base/t/text_tradition.t
Original file line number Diff line number Diff line change
@@ -71,5 +71,27 @@ try {



# =begin testing
{
use Text::Tradition;
use XML::LibXML;

my $s = Text::Tradition->new(
name => 'inline',
input => 'Tabular',
file => 't/data/simple.txt',
);

my $docstr = $s->as_tei_ps();
my $doc = XML::LibXML->load_xml( string => $docstr );
my $tei = $doc->documentElement;
is( $tei->nodeName, 'TEI', "Got a TEI document back out" );
# TODO test existence of witnesses, and that there are 4 apps in the output
is( $tei->getElementsByTagName('witness')->size, 3, "Found three witnesses" );
is( $tei->getElementsByTagName('app')->size, 4, "Found four apparatus entries" );
}




1;

0 comments on commit e221133

Please sign in to comment.