Skip to content

Commit

Permalink
Re-extract lexicon builder
Browse files Browse the repository at this point in the history
  • Loading branch information
jaysonvirissimo committed Apr 28, 2024
1 parent 8fe27c1 commit 5a63486
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 30 deletions.
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,21 @@ responsiveVoice.speak(sentence, "UK English Female");
### Generate lexicons to override text-to-speech pronunciation
```ruby
require 'aws-sdk-polly'
polly = Aws::Polly::Client.new

polly = Aws::Polly::Client.new(region: 'us-west-2')
sentence = "PATER NOSTER qui es in caelis"

# Add the lexicons
MedievalLatina::Lexicon.file_names_with_contents.each do |name, content|
polly.put_lexicon(name: name, content: content)
end
words = sentence.split(" ")
pronunciations = MedievalLatina.pronunciations_for(words)
lexicon = MedievalLatina::LexiconBuilder.new(pronunciations).call

name = "CustomLatin"
polly.put_lexicon(name: name, content: lexicon.to_s)

# Synthesize speech using the lexicons
polly.synthesize_speech(
lexicon_names: MedievalLatina::Lexicon.list_files,
text: "PATER NOSTER, qui es in caelis",
lexicon_names: [name],
text: sentence,
output_format: "mp3",
voice_id: "Joanna"
)
Expand Down
28 changes: 5 additions & 23 deletions bin/build
Original file line number Diff line number Diff line change
@@ -1,33 +1,15 @@
#!/usr/bin/env ruby

require 'json'
require 'rexml/document'
require 'cgi'
include REXML
require "bundler/setup"
require "medieval_latina"
require "medieval_latina/lexicon_builder"

def create_pls_file(file_path, words)
doc = Document.new
doc.add_element 'lexicon', {'version' => '1.0',
'xmlns' => 'http://www.w3.org/2005/01/pronunciation-lexicon',
'alphabet' => 'ipa',
'xml:lang' => 'en-US'}

words.each do |word, pronunciation|
lexeme = Element.new('lexeme')
grapheme = Element.new('grapheme')
phoneme = Element.new('phoneme')

grapheme.text = CGI.unescapeHTML(word)
phoneme.text = pronunciation

lexeme.add_element(grapheme)
lexeme.add_element(phoneme)

doc.root.add_element(lexeme)
end
document = MedievalLatina::LexiconBuilder.new(words).call

File.open(file_path, 'w') do |file|
doc.write(file, 2)
document.write(file, 2)
file.write("\n")
end
end
Expand Down
11 changes: 11 additions & 0 deletions lib/medieval_latina.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require "json"
require "medieval_latina/initializer"
require "medieval_latina/lexicon"
require "medieval_latina/lexicon_builder"
require "medieval_latina/version"
require "set"

Expand Down Expand Up @@ -80,6 +81,16 @@ def self.nouns
end
end

def self.pronunciations_for(words)
words.map(&:downcase).each_with_object({}) do |word, hash|
metadata = DICTIONARY[word]

if metadata["ipa"]
hash[word] = metadata["ipa"]
end
end
end

def self.rejoin_words(array)
array
.join(" ")
Expand Down
44 changes: 44 additions & 0 deletions lib/medieval_latina/lexicon_builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
require "cgi"
require "rexml/document"

class MedievalLatina
class LexiconBuilder
include REXML

def initialize(words)
@document = Document.new
@words = words
end

def call
document.add_element "lexicon", SPECIFICATION

words.each do |word, pronunciation|
lexeme = Element.new("lexeme")
grapheme = Element.new("grapheme")
phoneme = Element.new("phoneme")

grapheme.text = CGI.unescapeHTML(word)
phoneme.text = pronunciation

lexeme.add_element(grapheme)
lexeme.add_element(phoneme)

document.root.add_element(lexeme)
end

document
end

private

attr_reader :document, :words

SPECIFICATION = {
"version" => "1.0",
"xmlns" => "http://www.w3.org/2005/01/pronunciation-lexicon",
"alphabet" => "ipa",
"xml:lang" => "en-US"
}.freeze
end
end
11 changes: 11 additions & 0 deletions spec/medieval_latina_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,17 @@
specify { expect(described_class).to respond_to(:adverbs) }
end

describe ".pronunciations_for" do
let(:words) { ["beatus", "vir"] }

it "returns only the words and pronunciations asked for" do
expect(described_class.pronunciations_for(words)).to match(
"beatus" => "beatʊsː",
"vir" => "vir"
)
end
end

describe ".verbs" do
specify { expect(described_class).to respond_to(:verbs) }
end
Expand Down

0 comments on commit 5a63486

Please sign in to comment.