From f62d9f203fc6ffde9b34323022bcc0311b13c2ce Mon Sep 17 00:00:00 2001 From: Finn Brewer Date: Sat, 17 Aug 2024 18:22:38 -0700 Subject: [PATCH] fix: ending word getting removed in default parser if it doesn't end with punctuation --- src-tauri/src/language_parsing.rs | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src-tauri/src/language_parsing.rs b/src-tauri/src/language_parsing.rs index 1a3609c..7f5ab27 100644 --- a/src-tauri/src/language_parsing.rs +++ b/src-tauri/src/language_parsing.rs @@ -595,6 +595,9 @@ fn default_tokenizer( ) -> Result<(Vec, Vec), KalbaError> { let mut words = Vec::new(); let mut sentences = Vec::new(); + if sent.is_empty() { + return Ok((sentences, words)); + } let mut current_sentence = String::new(); let mut currently_building = String::new(); @@ -665,6 +668,35 @@ fn default_tokenizer( } } } + + if !currently_building.is_empty() { + let word = std::mem::take(&mut currently_building); + let rating = state + .to_save + .language_specific + .get_mut(&language) + .expect("language to be chosen") + .words + .entry(word.clone()) + .or_insert(crate::WordInfo { + rating: 0, + method: crate::Method::FromSeen, + history: vec![(chrono::Utc::now(), crate::Method::FromSeen, 0)], + }) + .rating; + + words.push(Word { + text: word.clone(), + clickable: true, + lemma: word.clone(), + rating, + morph: HashMap::new(), + other_forms: get_alternate_forms(&word, interpreter, state)?, + length: word.chars().count(), + whitespace_after: true, + sentence_index: sentences.len(), + }) + } if !current_sentence.is_empty() { sentences.push(current_sentence); }