diff --git a/README.md b/README.md index 5a2c3b3..744ba1a 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ ## Features -* Supports [69 languages](https://github.com/greyblake/whatlang-rs/blob/master/SUPPORTED_LANGUAGES.md) +* Supports [70 languages](https://github.com/greyblake/whatlang-rs/blob/master/SUPPORTED_LANGUAGES.md) * 100% written in Rust * Lightweight, fast and simple * Recognizes not only a language, but also a script (Latin, Cyrillic, etc) @@ -108,7 +108,7 @@ For more details, please check a blog article [Introduction to Rust Whatlang Lib | | Whatlang | CLD2 | CLD3 | | ------------------------- | ---------- | ----------- | -------------- | | Implementation language | Rust | C++ | C++ | -| Languages | 68 | 83 | 107 | +| Languages | 70 | 83 | 107 | | Algorithm | trigrams | quadgrams | neural network | | Supported Encoding | UTF-8 | UTF-8 | ? | | HTML support | no | yes | ? | diff --git a/SUPPORTED_LANGUAGES.md b/SUPPORTED_LANGUAGES.md index 0d5b019..0b91404 100644 --- a/SUPPORTED_LANGUAGES.md +++ b/SUPPORTED_LANGUAGES.md @@ -77,3 +77,4 @@ and [documentation](https://docs.rs/whatlang/). | Catalan | cat | `Lang::Cat` | | Tagalog | tgl | `Lang::Tgl` | | Armenian | hye | `Lang::Hye` | +| Welsh | cym | `Lang::Cym` | diff --git a/misc/alphabets/latin.yml b/misc/alphabets/latin.yml index f142dad..0670848 100644 --- a/misc/alphabets/latin.yml +++ b/misc/alphabets/latin.yml @@ -4,6 +4,7 @@ aka: abdefghiklmnoprstuwyɔɛ aze: abcdefghijklmnopqrstuvxyzçöüğışə̇ cat: abcdefghijklmnopqrstuvwxyz·àçèéíïòóúü ces: abcdefghijklmnopqrstuvwxyzáéíóúýčďěňřšťůž +cym: abcdefghijklmnopqrstuvwxyzàáâäèéêëìíîïòóôöùúûüýÿŵŷẁẃẅỳ dan: abcdefghijklmnopqrstuvwxyzåæø deu: abcdefghijklmnopqrstuvwxyzßäöü eng: abcdefghijklmnopqrstuvwxyz diff --git a/misc/alphabets/raw_latin.yml b/misc/alphabets/raw_latin.yml index 214201f..c2445ba 100644 --- a/misc/alphabets/raw_latin.yml +++ b/misc/alphabets/raw_latin.yml @@ -35,6 +35,7 @@ latin_based: est: "ÄÖÕÜäöõü" lat: "" tgl: "áéíñóú" + cym: "ÂÊÎÔÛŴŶÁÉÍÏâêîôûŵŷáéíïÓÚẂÝÀÈÌÒÙẀỲÄËÖÜẄŸóúẃýàèìòùẁỳäëöüẅÿ" others: tuk: "ABÇDEÄFGHIJŽKLMNŇOÖPRSŞTUÜWYÝZ" epo: "ABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ" diff --git a/misc/data.json b/misc/data.json index e8e5c5b..79dc3aa 100644 --- a/misc/data.json +++ b/misc/data.json @@ -120,7 +120,8 @@ "ndo": "na |oku|wa | na|a o|a n|ka |ntu| uu|tu |uth| om|e o|mba|ong|omu|ba | ok|uut| ne|he |the|ang|hem|emb|unt|o o|a u| wo|nge| iy|ehe|kal| no|a w|o n|no |nga|e n|ko |mun|oka|lo |o i|lon|we |ulu|a m|ala| ke|la |a k|u n|han|ku |gwa|osh|shi|ana|ngu|ilo|ano|ngo|keh| mo|ga |nen|man|ho |luk|tha|ge |gul|u k|eng|ha |a y|elo|uko|a e|ye |hil|uka|li |go |wan|ath|wo |thi|dhi|uun| pa|kwa| ta|a p|ya | sh| ko|nka|lwa| os|mwe|oma|ta |ema|sho| ka|e m| yo|sha|wok|ika|po |o w|onk|e p|pan|ith|a i|opa|gel|hik|iya|hi |aan|una|o g|kuk|alo|o e|nok|ndj|le |a a|men|yom|a s|i n| li|and| po|pam|lat|kan|ash|waa|aka|ame|gam|umb|a t|ond|yuu|o k|olo|ane|ing|igw|aa |ele|kul|mon| gw|ilw|gan|o y|iil|iyo| el|kut|nin|oko|ike|o m| ku|adh| ye|amw|ome|yeh|aye| ga| on| yi|a g|lyo|ne | ng|mbo|opo|kug|eko|yok|wom| oy|non|iye| go|ulo|e e| we| e |ina|ant|omo|ene| a |i k|mok|him| dh|und|ndu| me|eho|wen|nek| op|alu|e g|ima|kat|ota|oye|ila|ngw|yop|wat|ela|o u|a l| ii| ay| nd| th|o l|yon|ili|oon|okw|yaa|taa|lwe|omb| ni|aku|i m|mo |ula|ekw|enw|iyu|pok|epa|uki|ke | wu| mb|meh|e t|uni|nom|dho|pau|eta|yi | ly|o a|ono|lun|lak|ola|yo |lol|ank|bo |i o|awa|nwa|a h|naw|hok|nem|kom|ndo|o s|u t|vet|mbu|ani|uga|ndi|ukw|udh|lok|e k|alw|kwe|kun| ya", "quy": "chi|nch|hik|una| ka|anc|kun|man|ana|aq |cha|aku|pas|as |sqa|paq|nan|qa |apa|kan|ikp|ik |ech|spa| de|pa |cho|ere|der|rec|am | ru|an | ma| ch|kpa|asq|ta |na |nam|nak|taq|a k|qan|ina|run|lli|ach|nap|pi |mi | ll|yoq|asp|ima|hay|hin|aqa|nku|ant|ayn|oyo| hi| im|hoy|cio|nta|nas|q k|api|iw |wan|kuy|kay|liw|aci|ion|ipa|lla|oq |npa|ay |kas|a m|nac| na|inc|all|ama|ari|anp| ya|chu| hu|nin|pip|i k|qmi|hon|w r|ata|awa|a c|ota|in |yku|yna| wa|a h|has|a d|iku|a l| li|pan|ich|may| pi| ha|onc|a r|onk| ot|ku | qa|ank|aqm|mun|anm|hu |a p|nma| mu|qta|n h|pap|isq|yni|ikm|ma |wsa|aws|kaw|ibr|bre|lib|ayk|usp|nqa|e k| al|lin|n k|re |ara|nat|yac|kma|war|huk|uwa|yta|hwa|chw| sa|was|kus|yan|m d|kpi|q m|a i|q l|kin|tap|a a|kta|ikt|i c|a s|uy | ca|qaw|uku| tu| re|aqt|ask|qsi|sak|uch|q h|cas|tin|pak|ris|ski|sic|q d|nmi|s l|naq|tuk|mpa|a y|k c|uma|ien|ypi| am|qaq|qap|eqs|ayp|req|qpa|aqp|law|ayt|q c|pun| ni|a q|ruw|i h|haw|n c| pa|amp|par|k h| le|yma|ñun|ern|huñ|nni|n r|anq|map|aya|tar|s m|uñu|ten|val|ura|ita|arm|isu|s c|onn|igu| ri|qku|naw|k l|u l|his|ley|say|s y|rim|aru|rma|sun|ier|s o|qar|n p|a f|a t|esq|n a|oqm|s i|awk| va|w n|hap|lap|kup|i r|kam|uyk|sap| qe|ual|m p|ran|nya|gua| pe| go|gob|maq|sum|ast| su| ig", "rmn": "aj |en | te|te | sa| le|aka|pen| si| e |el |ipe|si |kaj|sar| th|and| o |sav|qe |les| ma|es | ha|j t|hak|ja |ar |ave| an|a s|ta |i l|ia |nas| aj|ne | so|imn|mna|sqe|esq|nd |tha|haj|e s|e t|e a|enq|asq|man| ja|kan|e m| i | ta|the|mes|cia|bar|as |isa|utn|qo |hem|o s|s s| me|vel|ark|i t| na|kas|est| ba|s h|avo| di|ard| bi| pe|rka|lo | ak|ika|e r|a a| pr|e k|qi |mat|ima|e p|a t| av|e d|r s|n s|anu|nuś|o t|avi|orr|o a| ka| re|n a|re |aja|e o|sqo|sti| ov|õl |l p|nqe|ere|d o|vor|so |no |dik|rel|ove|n t|ve |e b|res|tim|ren| de|àci|o m|i a|but|len|ali|ari|rre|de | pa|ver| va|sqi|ara|ana|vip|rak|ang|vi | ra|or |ker|i s|eme|e z|ata|e l|a e|rip|rim|akh|la |o p|kar|e h|a p|na |ane|rin|ste|j b|er |ind|ni |tne| ph|nip|r t| ke|ti |are|ndo| je|l a|uśi|e n|khi| bu|kon|lim|al |tar|ekh|jek|àlo|o k| ko|rde|rab|aba| zi|ri |aća|ćar|śik|dõl|dor|on |ano|ven| ni|śaj| śa|khe|ća |ast|j s|uti|uni|tni|naś|i d|mut| po|i p|a m| pu|a l|l s|som|n n|ikh|nik|del|ala|ris|pes|pe |j m|enć|e e|nća|ndi|rdõ|kri|erd|śka|emu|men|alo|nis|aśt|śti|amu|kh |tis|uj |j p|do |ani|ate|nda|o b|nge|o z|soc|a d|muj|o j|da |pri|rdo| as|cie|l t|ro |i r|kla|ing|a j| ze|zen|j e|ziv|hin|aśk| st|maś|ran|pal|khl|mam|i b|oci|rea|l o|nqo| vi|n e", - "lat": "is |et |us |um | et|ae |tat|ati| co|que|ue |ion| qu|em |ent|oni|est| su| iu| in| po|tio|tes|tis|ate|bus|e i|ita|ibu|ium|ius|qui|nti|eri|es |s p|con|s e|per|end|pot|ote| ha|nis| pr|s i|abe|uis|am |uae|tem|hab|bet|m h|ndi| ho|sta| de|sua|isq|squ|ter|ici|min|iur|one| re|hom| di| om|omn|rum|s a|t c|rat|lib|ibe|m e| pe|gen| li|ert|ine|nte|nem|ri |ber|tia|e q|dis| ip|ips| ad|di |nes|e s|e c|m p|s c| ve|e p| pa|ili| ge|a e|i p|nt |omi|atu|tur|rit| si|ne |psi|in |ia |ra |ari| cu|vit|rta|mo |to |mni|s h|e e|int|siu|m c|qua|t p|ivi|ini|ut |re |ers|it |s s|iae| es|t s|and| ne|pro| nu|st | ex|nda|cie|nib|t a|ere|tri|nit| at|tiu|ta |ris| ci|civ|ni |uri|ur |rim| vi|par|ad |ess|lic|i i| so| pu| op|rae| fa|s v| ut|dem|se |ons|o e|ria| se|e a| mo|leg|atq|tqu|com|te |niu|ien|vel|el | ma|t e|iis|gni|equ|oci|cip|ura|unt|s d|t i|ali|quo|ect| te|a s|t d| do|tut|ant|isc|ina|men|sin|ua |pra|oru|omm|eta|s n|a p|tum|iam|io |i c|sti| au|ver| ae|ito|dic|imi|s l|e d|fic|cia|t o|pub|ubl|bli|mun|i s|soc|aru|lar|ull|ori|t h|i e|sse|omo|cto|itu|tus| ea|ea |aeq|gio|ui |m s|er |m r| ra| fi|ffi|cog|da | le|mod|a c|mqu|nul|e o|era|ten|ntu|spe|o n|emo|cri|s f| ca|de |a d|rel|ii |ene| tu|sui|rti|sci|nae|m q|m a|egi|ces" + "lat": "is |et |us |um | et|ae |tat|ati| co|que|ue |ion| qu|em |ent|oni|est| su| iu| in| po|tio|tes|tis|ate|bus|e i|ita|ibu|ium|ius|qui|nti|eri|es |s p|con|s e|per|end|pot|ote| ha|nis| pr|s i|abe|uis|am |uae|tem|hab|bet|m h|ndi| ho|sta| de|sua|isq|squ|ter|ici|min|iur|one| re|hom| di| om|omn|rum|s a|t c|rat|lib|ibe|m e| pe|gen| li|ert|ine|nte|nem|ri |ber|tia|e q|dis| ip|ips| ad|di |nes|e s|e c|m p|s c| ve|e p| pa|ili| ge|a e|i p|nt |omi|atu|tur|rit| si|ne |psi|in |ia |ra |ari| cu|vit|rta|mo |to |mni|s h|e e|int|siu|m c|qua|t p|ivi|ini|ut |re |ers|it |s s|iae| es|t s|and| ne|pro| nu|st | ex|nda|cie|nib|t a|ere|tri|nit| at|tiu|ta |ris| ci|civ|ni |uri|ur |rim| vi|par|ad |ess|lic|i i| so| pu| op|rae| fa|s v| ut|dem|se |ons|o e|ria| se|e a| mo|leg|atq|tqu|com|te |niu|ien|vel|el | ma|t e|iis|gni|equ|oci|cip|ura|unt|s d|t i|ali|quo|ect| te|a s|t d| do|tut|ant|isc|ina|men|sin|ua |pra|oru|omm|eta|s n|a p|tum|iam|io |i c|sti| au|ver| ae|ito|dic|imi|s l|e d|fic|cia|t o|pub|ubl|bli|mun|i s|soc|aru|lar|ull|ori|t h|i e|sse|omo|cto|itu|tus| ea|ea |aeq|gio|ui |m s|er |m r| ra| fi|ffi|cog|da | le|mod|a c|mqu|nul|e o|era|ten|ntu|spe|o n|emo|cri|s f| ca|de |a d|rel|ii |ene| tu|sui|rti|sci|nae|m q|m a|egi|ces", + "cym": "yn | yn|dd | ma|ae |mae|au | y |d y|edd| r |ydd| ar| i |n y| o | cy|th | gw|ddi|eth|oed|ol |ar | gy| dd|wyd| ei| n | a |yd |odd| ga|aet|an | rh|iad|io |n a|ei |yr |wn |n c| ll| ca|n g|di |wed| me|od |el |n d|edi|r y|ith| we|ad | fe|er |r a|dau| da| am|d a|on |ch |l y|ddo| he| ch|roe| hy|e r| di|ynn| yr|dda|r g|gan|ir |ewn| ro|en | dy|fod| ff|iau|ll |mew| ym| de|id | sy|yw |dia|hyn|fyd|i g| un|eu |i d|nol|lla|u a|eit| ac|dol|i r|wy |dio|cyn|fel| ni|o r|idd|rth| go|l a|ai |efy|dyn| bo|rha|ed | dr|rwy|ada|n f|wyr|fer|ac |n e|rdd|aid|ael|all|nt |ion| tr|nyd|ach|gyf|cyf|r d|ig |h y|chw|ell|n b|d e|n o| by| ne|da | be|han|nia| oe|d o|r c|d g|dde|r o|af |ara|ni |n s| pe|lwy|gwe|i a|wr | br|in |gol| ge|rch|hef| ad|nod|nna|gyd| fa|un |d h| ys|d i|y d|e n|ria|es | an|dwy|am |ysg|y g|wyn|u c|l e|i f|gwy|efn|ddy|y c|dig|wys| eu|yda|n h|ych|thi|ant| yw|wei| ba|d c|n n|s y|yst|ryd|na |o a|i n|n m|u g|d d|law|i w|n i|n r| fo|ys |iae| co|do |lia|red|nd |y n|hau| ha|neu|u y|rhy|u r|bod| pr| ce|rae|gor|enn|gwa| pa|i c| er|lyn|rai|rif|ian|lli|nau|r h|lan|nwy|yfe|tha|r e|d m|diw|os |lle|ang| se|ddw|al |lad|o g|cae|ann|oli|a r|r b|rio|hyd|ait|aen|u d|no |d b| si|fan|lly|u h|o d|i b|dar|sgo|yng|dod|u n" }, "Cyrillic": { "rus": " пр| и |рав|ств| на|пра|го |ени|ове|во | ка|ани|ть | в | по| об|ия |сво| св|лов|на | че|ело|о н| со|ост|чел|ие |ого|ет |ния|ест|аво|ый |ажд| им|ние|век| не|льн|ли |ова|име|ать|при|т п|и п|каж|или|обо| ра|ых |жды| до|дый|воб|ек |бод|ва |й ч|его|ся |и с|ии |аци|еет|но |мее|и и|лен|ой |тва|ных|то | ил|к и|енн| бы|ию | за|ми |тво|и н|о п|ван|о с|сто|аль| вс|ом |о в|ьно|их |ног|и в|нов|ако|про|ий |сти|и о|пол|олж|дол|ое |бра|я в| ос|ным|жен|раз|ти |нос|я и| во|тор|все| ег|ей |тел|не |и р|ред|ель|тве|оди| ко|общ|о и| де|има|а и|чес|ним|сно|как| ли|щес|вле|ься|нны|аст|тьс|нно|осу|е д| от|пре|шен|а с|бще|осн|одн|быт|сов|ыть|лжн|ран|нию|иче|ак |ым |ват|что|сту|чен|е в| ст|рес|оль| ни|ном|род|ля |нар|вен|ду |оже|ны |е и| то|вер|а о|зов|м и|нац|ден|рин|туп|ежд|стр| чт|я п|она|дос|х и|й и|тоя|есп|лич|бес|обр|ото|о б|ьны|ь в|нии|е м|ую | мо|ем | ме|аро| ре|ава|кот|ав | вы|ам |жно|ста|ая |под|и к|ное| к | та| го|гос|суд|еоб|я н|ен |и д|мож|еск|ели|авн|ве |ече|уще|печ|дно|о д|ход|ка | дл|для|ово|ате|льс|ю и|в к|нен|ции|ной|уда|вов| бе|оро|нст|ами|циа|кон|сем|е о|вно| эт|азо|х п|ни |жде|м п|ког|от |дст|вны|сть|ые |о о|пос|сре|тра|ейс|так|и б|дов|му |я к|нал|дру| др|кой|тер|ь п|арс|изн|соц|еди|олн", diff --git a/misc/lang.rs.erb b/misc/lang.rs.erb index ea960a6..d5efffe 100644 --- a/misc/lang.rs.erb +++ b/misc/lang.rs.erb @@ -10,6 +10,11 @@ use crate::error::ParseError; /// Represents a language following [ISO 639-3](https://en.wikipedia.org/wiki/ISO_639-3) standard. #[cfg_attr(feature = "enum-map", derive(::enum_map::Enum))] #[cfg_attr(feature = "arbitrary", derive(::arbitrary::Arbitrary))] +#[cfg_attr( + feature = "serde", + derive(::serde::Serialize, ::serde::Deserialize), + serde(rename_all = "lowercase") +)] #[derive(PartialEq, Eq, Debug, Hash, Clone, Copy)] pub enum Lang { <% langs.each_with_index do |lang, index| %> @@ -191,4 +196,14 @@ mod tests { assert_eq!(Lang::Deu.to_string(), "Deutsch"); assert_eq!(Lang::Eng.to_string(), "English"); } + + #[cfg(feature = "serde")] + #[test] + fn test_serialize_and_deserialize() { + let langs = vec![Lang::Epo, Lang::Ukr, Lang::Spa]; + let json_langs = serde_json::to_string(&langs).unwrap(); + assert_eq!(json_langs, r#"["epo","ukr","spa"]"#); + let parsed_langs: Vec = serde_json::from_str(&json_langs).unwrap(); + assert_eq!(parsed_langs, langs); + } } diff --git a/misc/supported_languages.csv b/misc/supported_languages.csv index d3a748e..435f463 100644 --- a/misc/supported_languages.csv +++ b/misc/supported_languages.csv @@ -68,3 +68,4 @@ slk,Slovak,Slovenčina,5 cat,Catalan,Català,10 tgl,Tagalog,Tagalog, hye,Armenian,Հայերեն,7 +cym,Welsh,Cymraeg,0.5 diff --git a/src/alphabets/latin.rs b/src/alphabets/latin.rs index 9a1c1c8..846a590 100644 --- a/src/alphabets/latin.rs +++ b/src/alphabets/latin.rs @@ -10,6 +10,7 @@ const AKA: &str = "abdefghiklmnoprstuwyɔɛ"; const AZE: &str = "abcdefghijklmnopqrstuvxyzçöüğışə̇"; const CAT: &str = "abcdefghijklmnopqrstuvwxyz·àçèéíïòóúü"; const CES: &str = "abcdefghijklmnopqrstuvwxyzáéíóúýčďěňřšťůž"; +const CYM: &str = "abcdefghijklmnopqrstuvwxyzàáâäèéêëìíîïòóôöùúûüýÿŵŷẁẃẅỳ"; const DAN: &str = "abcdefghijklmnopqrstuvwxyzåæø"; const DEU: &str = "abcdefghijklmnopqrstuvwxyzßäöü"; const ENG: &str = "abcdefghijklmnopqrstuvwxyz"; @@ -49,6 +50,7 @@ const LATIN_ALPHABETS: &[(Lang, &str)] = &[ (Lang::Aze, AZE), (Lang::Cat, CAT), (Lang::Ces, CES), + (Lang::Cym, CYM), (Lang::Dan, DAN), (Lang::Deu, DEU), (Lang::Eng, ENG), @@ -163,8 +165,8 @@ mod tests { let outcome = alphabet_calculate_scores(&text, &filter); assert_eq!(outcome.count, 50); - assert_eq!(outcome.raw_scores.len(), 36); - assert_eq!(outcome.scores.len(), 36); + assert_eq!(outcome.raw_scores.len(), 37); + assert_eq!(outcome.scores.len(), 37); let raw_scores_for = |lang: Lang| { outcome diff --git a/src/core/detect.rs b/src/core/detect.rs index 52b9caa..3014a66 100644 --- a/src/core/detect.rs +++ b/src/core/detect.rs @@ -140,6 +140,7 @@ mod tests { Lang::Swe, Lang::Nob, Lang::Tgl, + Lang::Cym, ]); let options = Options::new().set_filter_list(filter_list); let output = detect_with_options(text, &options); diff --git a/src/lang.rs b/src/lang.rs index ba84ff3..b91f15b 100644 --- a/src/lang.rs +++ b/src/lang.rs @@ -223,9 +223,12 @@ pub enum Lang { /// Հայերեն (Armenian) Hye = 68, + + /// Cymraeg (Welsh) + Cym = 69, } -const VALUES: [Lang; 69] = [ +const VALUES: [Lang; 70] = [ Lang::Epo, Lang::Eng, Lang::Rus, @@ -295,6 +298,7 @@ const VALUES: [Lang; 69] = [ Lang::Cat, Lang::Tgl, Lang::Hye, + Lang::Cym, ]; fn lang_from_code>(code: S) -> Option { @@ -368,6 +372,7 @@ fn lang_from_code>(code: S) -> Option { "cat" => Some(Lang::Cat), "tgl" => Some(Lang::Tgl), "hye" => Some(Lang::Hye), + "cym" => Some(Lang::Cym), _ => None, } } @@ -443,6 +448,7 @@ fn lang_to_code(lang: Lang) -> &'static str { Lang::Cat => "cat", Lang::Tgl => "tgl", Lang::Hye => "hye", + Lang::Cym => "cym", } } @@ -517,6 +523,7 @@ fn lang_to_name(lang: Lang) -> &'static str { Lang::Cat => "Català", Lang::Tgl => "Tagalog", Lang::Hye => "Հայերեն", + Lang::Cym => "Cymraeg", } } @@ -591,6 +598,7 @@ fn lang_to_eng_name(lang: Lang) -> &'static str { Lang::Cat => "Catalan", Lang::Tgl => "Tagalog", Lang::Hye => "Armenian", + Lang::Cym => "Welsh", } } @@ -700,7 +708,7 @@ mod tests { #[test] fn test_all() { - assert_eq!(Lang::all().len(), 69); + assert_eq!(Lang::all().len(), 70); let all = Lang::all(); assert!(all.contains(&Lang::Ukr)); assert!(all.contains(&Lang::Swe)); diff --git a/src/scripts/lang_mapping.rs b/src/scripts/lang_mapping.rs index 80ea329..5ebb148 100644 --- a/src/scripts/lang_mapping.rs +++ b/src/scripts/lang_mapping.rs @@ -1,7 +1,7 @@ use super::Script; use crate::Lang; -const LATIN_LANGS: [Lang; 36] = [ +const LATIN_LANGS: [Lang; 37] = [ Lang::Spa, Lang::Eng, Lang::Por, @@ -38,6 +38,7 @@ const LATIN_LANGS: [Lang; 36] = [ Lang::Lav, Lang::Est, Lang::Lat, + Lang::Cym, ]; const CYRILLIC_LANGS: [Lang; 6] = [ Lang::Rus, diff --git a/src/trigrams/profiles.rs b/src/trigrams/profiles.rs index 38e3a95..137ae4e 100644 --- a/src/trigrams/profiles.rs +++ b/src/trigrams/profiles.rs @@ -10989,6 +10989,311 @@ pub static LATIN_LANGS: LangProfileList = &[ Trigram('c', 'e', 's'), ], ), + ( + Lang::Cym, + &[ + Trigram('y', 'n', ' '), + Trigram(' ', 'y', 'n'), + Trigram('d', 'd', ' '), + Trigram(' ', 'm', 'a'), + Trigram('a', 'e', ' '), + Trigram('m', 'a', 'e'), + Trigram('a', 'u', ' '), + Trigram(' ', 'y', ' '), + Trigram('d', ' ', 'y'), + Trigram('e', 'd', 'd'), + Trigram(' ', 'r', ' '), + Trigram('y', 'd', 'd'), + Trigram(' ', 'a', 'r'), + Trigram(' ', 'i', ' '), + Trigram('n', ' ', 'y'), + Trigram(' ', 'o', ' '), + Trigram(' ', 'c', 'y'), + Trigram('t', 'h', ' '), + Trigram(' ', 'g', 'w'), + Trigram('d', 'd', 'i'), + Trigram('e', 't', 'h'), + Trigram('o', 'e', 'd'), + Trigram('o', 'l', ' '), + Trigram('a', 'r', ' '), + Trigram(' ', 'g', 'y'), + Trigram(' ', 'd', 'd'), + Trigram('w', 'y', 'd'), + Trigram(' ', 'e', 'i'), + Trigram(' ', 'n', ' '), + Trigram(' ', 'a', ' '), + Trigram('y', 'd', ' '), + Trigram('o', 'd', 'd'), + Trigram(' ', 'g', 'a'), + Trigram('a', 'e', 't'), + Trigram('a', 'n', ' '), + Trigram(' ', 'r', 'h'), + Trigram('i', 'a', 'd'), + Trigram('i', 'o', ' '), + Trigram('n', ' ', 'a'), + Trigram('e', 'i', ' '), + Trigram('y', 'r', ' '), + Trigram('w', 'n', ' '), + Trigram('n', ' ', 'c'), + Trigram(' ', 'l', 'l'), + Trigram(' ', 'c', 'a'), + Trigram('n', ' ', 'g'), + Trigram('d', 'i', ' '), + Trigram('w', 'e', 'd'), + Trigram(' ', 'm', 'e'), + Trigram('o', 'd', ' '), + Trigram('e', 'l', ' '), + Trigram('n', ' ', 'd'), + Trigram('e', 'd', 'i'), + Trigram('r', ' ', 'y'), + Trigram('i', 't', 'h'), + Trigram(' ', 'w', 'e'), + Trigram('a', 'd', ' '), + Trigram(' ', 'f', 'e'), + Trigram('e', 'r', ' '), + Trigram('r', ' ', 'a'), + Trigram('d', 'a', 'u'), + Trigram(' ', 'd', 'a'), + Trigram(' ', 'a', 'm'), + Trigram('d', ' ', 'a'), + Trigram('o', 'n', ' '), + Trigram('c', 'h', ' '), + Trigram('l', ' ', 'y'), + Trigram('d', 'd', 'o'), + Trigram(' ', 'h', 'e'), + Trigram(' ', 'c', 'h'), + Trigram('r', 'o', 'e'), + Trigram(' ', 'h', 'y'), + Trigram('e', ' ', 'r'), + Trigram(' ', 'd', 'i'), + Trigram('y', 'n', 'n'), + Trigram(' ', 'y', 'r'), + Trigram('d', 'd', 'a'), + Trigram('r', ' ', 'g'), + Trigram('g', 'a', 'n'), + Trigram('i', 'r', ' '), + Trigram('e', 'w', 'n'), + Trigram(' ', 'r', 'o'), + Trigram('e', 'n', ' '), + Trigram(' ', 'd', 'y'), + Trigram('f', 'o', 'd'), + Trigram(' ', 'f', 'f'), + Trigram('i', 'a', 'u'), + Trigram('l', 'l', ' '), + Trigram('m', 'e', 'w'), + Trigram(' ', 'y', 'm'), + Trigram(' ', 'd', 'e'), + Trigram('i', 'd', ' '), + Trigram(' ', 's', 'y'), + Trigram('y', 'w', ' '), + Trigram('d', 'i', 'a'), + Trigram('h', 'y', 'n'), + Trigram('f', 'y', 'd'), + Trigram('i', ' ', 'g'), + Trigram(' ', 'u', 'n'), + Trigram('e', 'u', ' '), + Trigram('i', ' ', 'd'), + Trigram('n', 'o', 'l'), + Trigram('l', 'l', 'a'), + Trigram('u', ' ', 'a'), + Trigram('e', 'i', 't'), + Trigram(' ', 'a', 'c'), + Trigram('d', 'o', 'l'), + Trigram('i', ' ', 'r'), + Trigram('w', 'y', ' '), + Trigram('d', 'i', 'o'), + Trigram('c', 'y', 'n'), + Trigram('f', 'e', 'l'), + Trigram(' ', 'n', 'i'), + Trigram('o', ' ', 'r'), + Trigram('i', 'd', 'd'), + Trigram('r', 't', 'h'), + Trigram(' ', 'g', 'o'), + Trigram('l', ' ', 'a'), + Trigram('a', 'i', ' '), + Trigram('e', 'f', 'y'), + Trigram('d', 'y', 'n'), + Trigram(' ', 'b', 'o'), + Trigram('r', 'h', 'a'), + Trigram('e', 'd', ' '), + Trigram(' ', 'd', 'r'), + Trigram('r', 'w', 'y'), + Trigram('a', 'd', 'a'), + Trigram('n', ' ', 'f'), + Trigram('w', 'y', 'r'), + Trigram('f', 'e', 'r'), + Trigram('a', 'c', ' '), + Trigram('n', ' ', 'e'), + Trigram('r', 'd', 'd'), + Trigram('a', 'i', 'd'), + Trigram('a', 'e', 'l'), + Trigram('a', 'l', 'l'), + Trigram('n', 't', ' '), + Trigram('i', 'o', 'n'), + Trigram(' ', 't', 'r'), + Trigram('n', 'y', 'd'), + Trigram('a', 'c', 'h'), + Trigram('g', 'y', 'f'), + Trigram('c', 'y', 'f'), + Trigram('r', ' ', 'd'), + Trigram('i', 'g', ' '), + Trigram('h', ' ', 'y'), + Trigram('c', 'h', 'w'), + Trigram('e', 'l', 'l'), + Trigram('n', ' ', 'b'), + Trigram('d', ' ', 'e'), + Trigram('n', ' ', 'o'), + Trigram(' ', 'b', 'y'), + Trigram(' ', 'n', 'e'), + Trigram('d', 'a', ' '), + Trigram(' ', 'b', 'e'), + Trigram('h', 'a', 'n'), + Trigram('n', 'i', 'a'), + Trigram(' ', 'o', 'e'), + Trigram('d', ' ', 'o'), + Trigram('r', ' ', 'c'), + Trigram('d', ' ', 'g'), + Trigram('d', 'd', 'e'), + Trigram('r', ' ', 'o'), + Trigram('a', 'f', ' '), + Trigram('a', 'r', 'a'), + Trigram('n', 'i', ' '), + Trigram('n', ' ', 's'), + Trigram(' ', 'p', 'e'), + Trigram('l', 'w', 'y'), + Trigram('g', 'w', 'e'), + Trigram('i', ' ', 'a'), + Trigram('w', 'r', ' '), + Trigram(' ', 'b', 'r'), + Trigram('i', 'n', ' '), + Trigram('g', 'o', 'l'), + Trigram(' ', 'g', 'e'), + Trigram('r', 'c', 'h'), + Trigram('h', 'e', 'f'), + Trigram(' ', 'a', 'd'), + Trigram('n', 'o', 'd'), + Trigram('n', 'n', 'a'), + Trigram('g', 'y', 'd'), + Trigram(' ', 'f', 'a'), + Trigram('u', 'n', ' '), + Trigram('d', ' ', 'h'), + Trigram(' ', 'y', 's'), + Trigram('d', ' ', 'i'), + Trigram('y', ' ', 'd'), + Trigram('e', ' ', 'n'), + Trigram('r', 'i', 'a'), + Trigram('e', 's', ' '), + Trigram(' ', 'a', 'n'), + Trigram('d', 'w', 'y'), + Trigram('a', 'm', ' '), + Trigram('y', 's', 'g'), + Trigram('y', ' ', 'g'), + Trigram('w', 'y', 'n'), + Trigram('u', ' ', 'c'), + Trigram('l', ' ', 'e'), + Trigram('i', ' ', 'f'), + Trigram('g', 'w', 'y'), + Trigram('e', 'f', 'n'), + Trigram('d', 'd', 'y'), + Trigram('y', ' ', 'c'), + Trigram('d', 'i', 'g'), + Trigram('w', 'y', 's'), + Trigram(' ', 'e', 'u'), + Trigram('y', 'd', 'a'), + Trigram('n', ' ', 'h'), + Trigram('y', 'c', 'h'), + Trigram('t', 'h', 'i'), + Trigram('a', 'n', 't'), + Trigram(' ', 'y', 'w'), + Trigram('w', 'e', 'i'), + Trigram(' ', 'b', 'a'), + Trigram('d', ' ', 'c'), + Trigram('n', ' ', 'n'), + Trigram('s', ' ', 'y'), + Trigram('y', 's', 't'), + Trigram('r', 'y', 'd'), + Trigram('n', 'a', ' '), + Trigram('o', ' ', 'a'), + Trigram('i', ' ', 'n'), + Trigram('n', ' ', 'm'), + Trigram('u', ' ', 'g'), + Trigram('d', ' ', 'd'), + Trigram('l', 'a', 'w'), + Trigram('i', ' ', 'w'), + Trigram('n', ' ', 'i'), + Trigram('n', ' ', 'r'), + Trigram(' ', 'f', 'o'), + Trigram('y', 's', ' '), + Trigram('i', 'a', 'e'), + Trigram(' ', 'c', 'o'), + Trigram('d', 'o', ' '), + Trigram('l', 'i', 'a'), + Trigram('r', 'e', 'd'), + Trigram('n', 'd', ' '), + Trigram('y', ' ', 'n'), + Trigram('h', 'a', 'u'), + Trigram(' ', 'h', 'a'), + Trigram('n', 'e', 'u'), + Trigram('u', ' ', 'y'), + Trigram('r', 'h', 'y'), + Trigram('u', ' ', 'r'), + Trigram('b', 'o', 'd'), + Trigram(' ', 'p', 'r'), + Trigram(' ', 'c', 'e'), + Trigram('r', 'a', 'e'), + Trigram('g', 'o', 'r'), + Trigram('e', 'n', 'n'), + Trigram('g', 'w', 'a'), + Trigram(' ', 'p', 'a'), + Trigram('i', ' ', 'c'), + Trigram(' ', 'e', 'r'), + Trigram('l', 'y', 'n'), + Trigram('r', 'a', 'i'), + Trigram('r', 'i', 'f'), + Trigram('i', 'a', 'n'), + Trigram('l', 'l', 'i'), + Trigram('n', 'a', 'u'), + Trigram('r', ' ', 'h'), + Trigram('l', 'a', 'n'), + Trigram('n', 'w', 'y'), + Trigram('y', 'f', 'e'), + Trigram('t', 'h', 'a'), + Trigram('r', ' ', 'e'), + Trigram('d', ' ', 'm'), + Trigram('d', 'i', 'w'), + Trigram('o', 's', ' '), + Trigram('l', 'l', 'e'), + Trigram('a', 'n', 'g'), + Trigram(' ', 's', 'e'), + Trigram('d', 'd', 'w'), + Trigram('a', 'l', ' '), + Trigram('l', 'a', 'd'), + Trigram('o', ' ', 'g'), + Trigram('c', 'a', 'e'), + Trigram('a', 'n', 'n'), + Trigram('o', 'l', 'i'), + Trigram('a', ' ', 'r'), + Trigram('r', ' ', 'b'), + Trigram('r', 'i', 'o'), + Trigram('h', 'y', 'd'), + Trigram('a', 'i', 't'), + Trigram('a', 'e', 'n'), + Trigram('u', ' ', 'd'), + Trigram('n', 'o', ' '), + Trigram('d', ' ', 'b'), + Trigram(' ', 's', 'i'), + Trigram('f', 'a', 'n'), + Trigram('l', 'l', 'y'), + Trigram('u', ' ', 'h'), + Trigram('o', ' ', 'd'), + Trigram('i', ' ', 'b'), + Trigram('d', 'a', 'r'), + Trigram('s', 'g', 'o'), + Trigram('y', 'n', 'g'), + Trigram('d', 'o', 'd'), + Trigram('u', ' ', 'n'), + ], + ), ]; /// Languages for script Cyrillic