Skip to content

Commit

Permalink
Ensure PLS files are valid
Browse files Browse the repository at this point in the history
  • Loading branch information
jaysonvirissimo committed Apr 28, 2024
1 parent 24fb19c commit 5d3b88f
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 1 deletion.
1 change: 1 addition & 0 deletions lexicons/Latin00.pls
Original file line number Diff line number Diff line change
Expand Up @@ -1796,3 +1796,4 @@
<grapheme>pedis</grapheme>
<phoneme>pɛdɪs</phoneme>
</lexeme>
<lexicon/>
1 change: 1 addition & 0 deletions lexicons/Latin01.pls
Original file line number Diff line number Diff line change
Expand Up @@ -1891,3 +1891,4 @@
<grapheme>parcere</grapheme>
<phoneme>parkɛre</phoneme>
</lexeme>
<lexicon/>
1 change: 1 addition & 0 deletions lexicons/Latin02.pls
Original file line number Diff line number Diff line change
Expand Up @@ -1851,3 +1851,4 @@
<grapheme>desitum</grapheme>
<phoneme>desitumː</phoneme>
</lexeme>
<lexicon/>
1 change: 1 addition & 0 deletions lexicons/Latin03.pls
Original file line number Diff line number Diff line change
Expand Up @@ -1888,3 +1888,4 @@
<grapheme>insulto</grapheme>
<phoneme>inˈsul.to</phoneme>
</lexeme>
<lexicon/>
1 change: 1 addition & 0 deletions lexicons/Latin04.pls
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,4 @@
<grapheme>vulnero</grapheme>
<phoneme>ˈvul.ne.ro</phoneme>
</lexeme>
<lexicon/>
4 changes: 3 additions & 1 deletion lib/medieval_latina/lexicon_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,19 @@ def call
xml.lexicon(xmlns: URL, version: "1.0") do
grouped_hash.each do |phonetics, words|
if xml.target!.length > MAX_SIZE
xml.lexicon # Close the current lexicon tag
write_file(xml.target!, file_index)
file_index += 1
xml = Builder::XmlMarkup.new(indent: 2) # Reset XML builder
xml.instruct! :xml, encoding: "UTF-8"
xml.lexicon(xmlns: URL, version: "1.0")
xml.lexicon(xmlns: URL, version: "1.0") # Start a new lexicon tag
end
xml.lexeme do
words.each { |word| xml.grapheme word }
xml.phoneme phonetics
end
end
xml.lexicon # Close the final lexicon tag
end
write_file(xml.target!, file_index) # Write the last file
end
Expand Down
18 changes: 18 additions & 0 deletions spec/medieval_latina_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -191,5 +191,23 @@
expect { Nokogiri::XML(content) }.not_to raise_error
end
end

it "includes the XML declaration" do
lexicon_files.each do |file|
content = File.read(file)
expect(content).to match(/\A<\?xml version="1\.0" encoding="UTF-8"\?>\n/)
end
end

it "contains valid IPA characters in the <phoneme> elements" do
valid_ipa_regex = /\A[\p{L}\p{M}\p{N}\p{P}\p{S}\p{Z}]+\z/
lexicon_files.each do |file|
content = File.read(file)
doc = Nokogiri::XML(content)
doc.xpath("//phoneme").each do |phoneme|
expect(phoneme.text).to match(valid_ipa_regex)
end
end
end
end
end

0 comments on commit 5d3b88f

Please sign in to comment.