From b5ca4b62fd2bbae3a0d720f7bde4c2d221391e17 Mon Sep 17 00:00:00 2001 From: Dmitry Shpika Date: Sun, 25 Jun 2017 18:29:23 +0300 Subject: [PATCH] Fix #3: Add string deduplication function to circumvent the bug in Zorba --- src/tags.xq | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/tags.xq b/src/tags.xq index e3d9f8a..67f9371 100644 --- a/src/tags.xq +++ b/src/tags.xq @@ -3,7 +3,18 @@ module namespace tags = "tags"; (: This file is generated, do not edit manually! :) +declare function tags:deduplicate($text as xs:string) as xs:string? { + let $len := string-length($text) div 2 + let $fst := substring($text, 1, $len) + let $snd := substring($text, $len+1) + return if ($fst = $snd) then $fst else $text +}; + declare function tags:convert-entity($word-id as xs:string, $text as xs:string) as xs:string? { + tags:convert-entity-normalized($word-id, tags:deduplicate(normalize-space($text))) +}; + +declare function tags:convert-entity-normalized($word-id as xs:string, $text as xs:string) as xs:string? { switch($text) case "martial arts term" return "MA" case "rude or X-rated term (not displayed in educational software)" return "X"