Skip to content

Commit

Permalink
Merge pull request #1 from mpbraendle/mpbraendle-patch-1
Browse files Browse the repository at this point in the history
Update search_xapian.pl
  • Loading branch information
mpbraendle committed Jan 15, 2016
2 parents 5017e4c + ee18c3f commit 6efe957
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions lib/cfg.d/search_xapian.pl
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,31 @@
next if !EPrints::Utils::is_set( $value );
$tg->index_text( $value );
$tg->increase_termpos();
next if length($value) > 200; # Xapian term length limit-ish

# Allow indexing of long text fields (e.g. abstracts that are longer than 200 chars)
# The Xapian length limit applies to only a single term with 200 consecutive chars without white-space
# next if length($value) > 200; # Xapian term length limit-ish
if( $field->isa( "EPrints::MetaField::Text" ) || $field->isa( "EPrints::MetaField::Name" ) )
{
$tg->index_text( $value, 2, $prefix );
$tg->increase_termpos();
}
else
{
$doc->add_term( $prefix . $value );
# Improve indexing: Long term values must be filtered
# Dates are stripped to year only and are added with wdf = 0
# This enables that records are correctly sorted by date
next if length($value) > 200; # Xapian term length limit-ish

if ($field->name eq 'date')
{
$value =~ /^(\d{4})/;
$doc->add_boolean_term( $prefix . $value );
}
else
{
$doc->add_term( $prefix . $value );
}
}
}
foreach my $langid (@{$repo->config( "languages" )})
Expand Down

0 comments on commit 6efe957

Please sign in to comment.