From ae3a17c56e9c8fa6dfec5ed036616b26dd765057 Mon Sep 17 00:00:00 2001 From: ppisljar Date: Sat, 5 Aug 2023 19:15:42 +0200 Subject: [PATCH 1/4] allows installing as python library with pip --- .gitignore | 2 ++ MANIFEST.in | 1 + README.md | 16 ++++++++++++++++ requirements.txt | 3 +++ setup.py | 11 +++++++++++ slovene_g2p.egg-info/PKG-INFO | 4 ++++ slovene_g2p.egg-info/SOURCES.txt | 14 ++++++++++++++ slovene_g2p.egg-info/dependency_links.txt | 1 + slovene_g2p.egg-info/top_level.txt | 1 + SloveneG2P.py => slovene_g2p/SloveneG2P.py | 9 +++++---- slovene_g2p/__init__.py | 0 .../resources}/SloveneG2P_phoneme_set.json | 0 .../resources}/schwa_rules.tsv | 0 .../resources}/table_of_consonant_phonemes.tsv | 0 .../table_of_obstruent_conversions.tsv | 0 .../resources}/table_of_other_symbols.tsv | 0 .../resources}/table_of_vowel_phonemes.tsv | 0 17 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 .gitignore create mode 100644 MANIFEST.in create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 slovene_g2p.egg-info/PKG-INFO create mode 100644 slovene_g2p.egg-info/SOURCES.txt create mode 100644 slovene_g2p.egg-info/dependency_links.txt create mode 100644 slovene_g2p.egg-info/top_level.txt rename SloveneG2P.py => slovene_g2p/SloveneG2P.py (97%) create mode 100644 slovene_g2p/__init__.py rename {resources => slovene_g2p/resources}/SloveneG2P_phoneme_set.json (100%) rename {resources => slovene_g2p/resources}/schwa_rules.tsv (100%) rename {resources => slovene_g2p/resources}/table_of_consonant_phonemes.tsv (100%) rename {resources => slovene_g2p/resources}/table_of_obstruent_conversions.tsv (100%) rename {resources => slovene_g2p/resources}/table_of_other_symbols.tsv (100%) rename {resources => slovene_g2p/resources}/table_of_vowel_phonemes.tsv (100%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b2b5f29 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +slovene_g2p.egg-info +build \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..e6fcb18 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include slovene_g2p/resources/ * \ No newline at end of file diff --git a/README.md b/README.md index c001bd9..5ceb362 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,18 @@ # slovene_g2p A converter that converts Slovene words to their IPA and/or SAMPA transcriptions. + +## installation + +pip install . + +## usage + +``` +from slovene_g2p.SloveneG2P import SloveneG2P +g2p = SloveneG2P("ipa_symbol", "cjvt_ipa_detailed_representation", "phoneme_string") +g2p.convert_to_phonetic_transcription(word="govoriti", msd_sl="Ggdd-em", morphological_pattern_code="G1.2.d") +``` + +phoneme_option can be either "ipa_symbol" or "sampa_symbol" and representation option can be either "cjvt_ipa_detailed_representation", "cjvt_ipa_robust_representation", "cjvt_sampa_detailed_representation", "cjvt_sampa_robust_representation" + +both msd_sl and morphological_pattern_code are available in sloleks 3.0 and provided by classla python package \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f349118 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +nltk>=3.6.7 +classla>=1.1.0 +reldi-tokeniser>=1.0.1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..1fc51dc --- /dev/null +++ b/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup, find_packages + +setup( + name='slovene_g2p', + version='0.1', + packages=find_packages(), + install_requires=[ + # Add any required external dependencies here + ], + include_package_data=True, +) \ No newline at end of file diff --git a/slovene_g2p.egg-info/PKG-INFO b/slovene_g2p.egg-info/PKG-INFO new file mode 100644 index 0000000..406c0fd --- /dev/null +++ b/slovene_g2p.egg-info/PKG-INFO @@ -0,0 +1,4 @@ +Metadata-Version: 2.1 +Name: slovene-g2p +Version: 0.1 +License-File: LICENSE diff --git a/slovene_g2p.egg-info/SOURCES.txt b/slovene_g2p.egg-info/SOURCES.txt new file mode 100644 index 0000000..aaf4b45 --- /dev/null +++ b/slovene_g2p.egg-info/SOURCES.txt @@ -0,0 +1,14 @@ +LICENSE +MANIFEST.in +README.md +setup.py +slovene_g2p.egg-info/PKG-INFO +slovene_g2p.egg-info/SOURCES.txt +slovene_g2p.egg-info/dependency_links.txt +slovene_g2p.egg-info/top_level.txt +slovene_g2p/resources/SloveneG2P_phoneme_set.json +slovene_g2p/resources/schwa_rules.tsv +slovene_g2p/resources/table_of_consonant_phonemes.tsv +slovene_g2p/resources/table_of_obstruent_conversions.tsv +slovene_g2p/resources/table_of_other_symbols.tsv +slovene_g2p/resources/table_of_vowel_phonemes.tsv \ No newline at end of file diff --git a/slovene_g2p.egg-info/dependency_links.txt b/slovene_g2p.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/slovene_g2p.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/slovene_g2p.egg-info/top_level.txt b/slovene_g2p.egg-info/top_level.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/slovene_g2p.egg-info/top_level.txt @@ -0,0 +1 @@ + diff --git a/SloveneG2P.py b/slovene_g2p/SloveneG2P.py similarity index 97% rename from SloveneG2P.py rename to slovene_g2p/SloveneG2P.py index 012a645..9630c1e 100644 --- a/SloveneG2P.py +++ b/slovene_g2p/SloveneG2P.py @@ -1,12 +1,14 @@ import json +import os from collections import defaultdict as dd +current_folder = os.path.dirname(__file__) class SloveneG2P: def __init__(self, representation_option, phoneme_set_option, output_option): - self.phoneme_set_file_path = "./resources/SloveneG2P_phoneme_set.json" - self.conversion_file_path = "./resources/table_of_obstruent_conversions.tsv" + self.phoneme_set_file_path = os.path.join(current_folder, "resources/SloveneG2P_phoneme_set.json") + self.conversion_file_path = os.path.join(current_folder, "resources/table_of_obstruent_conversions.tsv") self.representation_option = representation_option self.phoneme_set_option = phoneme_set_option @@ -32,7 +34,7 @@ def __init__(self, representation_option, phoneme_set_option, output_option): # GET LIST OF SCHWA RULES self.set_schwa_combinations = set() - file_with_schwa_rules = open("./resources/schwa_rules.tsv", "r", encoding="UTF-8").readlines() + file_with_schwa_rules = open(os.path.join(current_folder, "resources/schwa_rules.tsv"), "r", encoding="UTF-8").readlines() for line in file_with_schwa_rules: all_info = line.strip("\n").split("\t") morph_code = all_info[0] @@ -40,7 +42,6 @@ def __init__(self, representation_option, phoneme_set_option, output_option): relevant_msds = all_info[2] for relevant_msd in relevant_msds.split(", "): schwa_combination = f"{morph_code} ~ {relevant_msd}" - print(schwa_combination) self.set_schwa_combinations.add(schwa_combination) # RESOURCE FUNCTION - LIST OF VOWEL GRAPHEMES diff --git a/slovene_g2p/__init__.py b/slovene_g2p/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/resources/SloveneG2P_phoneme_set.json b/slovene_g2p/resources/SloveneG2P_phoneme_set.json similarity index 100% rename from resources/SloveneG2P_phoneme_set.json rename to slovene_g2p/resources/SloveneG2P_phoneme_set.json diff --git a/resources/schwa_rules.tsv b/slovene_g2p/resources/schwa_rules.tsv similarity index 100% rename from resources/schwa_rules.tsv rename to slovene_g2p/resources/schwa_rules.tsv diff --git a/resources/table_of_consonant_phonemes.tsv b/slovene_g2p/resources/table_of_consonant_phonemes.tsv similarity index 100% rename from resources/table_of_consonant_phonemes.tsv rename to slovene_g2p/resources/table_of_consonant_phonemes.tsv diff --git a/resources/table_of_obstruent_conversions.tsv b/slovene_g2p/resources/table_of_obstruent_conversions.tsv similarity index 100% rename from resources/table_of_obstruent_conversions.tsv rename to slovene_g2p/resources/table_of_obstruent_conversions.tsv diff --git a/resources/table_of_other_symbols.tsv b/slovene_g2p/resources/table_of_other_symbols.tsv similarity index 100% rename from resources/table_of_other_symbols.tsv rename to slovene_g2p/resources/table_of_other_symbols.tsv diff --git a/resources/table_of_vowel_phonemes.tsv b/slovene_g2p/resources/table_of_vowel_phonemes.tsv similarity index 100% rename from resources/table_of_vowel_phonemes.tsv rename to slovene_g2p/resources/table_of_vowel_phonemes.tsv From 1f91e4d08b2a16ac0bbd6ee4cd0e041f0d2abcae Mon Sep 17 00:00:00 2001 From: ppisljar Date: Mon, 22 Jan 2024 19:09:53 +0100 Subject: [PATCH 2/4] updating package --- .gitignore | 6 ++++-- MANIFEST.in | 2 +- README.md | 2 -- pyproject.toml | 3 +++ setup.py | 21 ++++++++++++++++----- slovene_g2p/__init__.py | 1 + 6 files changed, 25 insertions(+), 10 deletions(-) create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index b2b5f29..21c0633 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ -slovene_g2p.egg-info -build \ No newline at end of file +slovene_g2p.egg-info/* +slovene_g2p/slovene_g2p.egg-info/* +build +dist diff --git a/MANIFEST.in b/MANIFEST.in index e6fcb18..3a01181 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -recursive-include slovene_g2p/resources/ * \ No newline at end of file +recursive-include slovene_g2p/src/resources/ * \ No newline at end of file diff --git a/README.md b/README.md index 5ceb362..b298c46 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,7 @@ # slovene_g2p A converter that converts Slovene words to their IPA and/or SAMPA transcriptions. -## installation -pip install . ## usage diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8cf3256 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.py b/setup.py index 1fc51dc..3122544 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,22 @@ from setuptools import setup, find_packages +import os + +cwd = os.path.dirname(os.path.abspath(__file__)) + +requirements = open(os.path.join(cwd, "requirements.txt"), "r").readlines() + +with open("README.md", "r", encoding="utf-8") as readme_file: + README = readme_file.read() + setup( name='slovene_g2p', - version='0.1', - packages=find_packages(), - install_requires=[ - # Add any required external dependencies here - ], + version='0.0.2', + author = "Peter Pisljar", + description = "rule based slovenian g2p", + long_description=README, + install_requires=requirements, + packages=find_packages('slovene_g2p'), + python_requires=">=3.8.0, <3.12", include_package_data=True, ) \ No newline at end of file diff --git a/slovene_g2p/__init__.py b/slovene_g2p/__init__.py index e69de29..c620b44 100644 --- a/slovene_g2p/__init__.py +++ b/slovene_g2p/__init__.py @@ -0,0 +1 @@ +from .SloveneG2P import SloveneG2P \ No newline at end of file From 6ec48fe639695104196ebc015676d1b59d130366 Mon Sep 17 00:00:00 2001 From: ppisljar Date: Wed, 24 Jan 2024 21:05:20 +0100 Subject: [PATCH 3/4] cleanup --- MANIFEST.in | 2 +- setup.py | 4 +-- slovene_g2p.egg-info/PKG-INFO | 26 +++++++++++++++++- slovene_g2p.egg-info/SOURCES.txt | 4 +++ slovene_g2p.egg-info/top_level.txt | 2 +- slovene_g2p/SloveneG2P.py | 13 +++++++++ .../__pycache__/SloveneG2P.cpython-310.pyc | Bin 0 -> 16069 bytes .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 187 bytes 8 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 slovene_g2p/__pycache__/SloveneG2P.cpython-310.pyc create mode 100644 slovene_g2p/__pycache__/__init__.cpython-310.pyc diff --git a/MANIFEST.in b/MANIFEST.in index 3a01181..e6fcb18 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -recursive-include slovene_g2p/src/resources/ * \ No newline at end of file +recursive-include slovene_g2p/resources/ * \ No newline at end of file diff --git a/setup.py b/setup.py index 3122544..cdab3d6 100644 --- a/setup.py +++ b/setup.py @@ -11,12 +11,12 @@ setup( name='slovene_g2p', - version='0.0.2', + version='0.0.7', author = "Peter Pisljar", description = "rule based slovenian g2p", long_description=README, install_requires=requirements, - packages=find_packages('slovene_g2p'), + packages=find_packages(), python_requires=">=3.8.0, <3.12", include_package_data=True, ) \ No newline at end of file diff --git a/slovene_g2p.egg-info/PKG-INFO b/slovene_g2p.egg-info/PKG-INFO index 406c0fd..5b97cc5 100644 --- a/slovene_g2p.egg-info/PKG-INFO +++ b/slovene_g2p.egg-info/PKG-INFO @@ -1,4 +1,28 @@ Metadata-Version: 2.1 Name: slovene-g2p -Version: 0.1 +Version: 0.0.7 +Summary: rule based slovenian g2p +Home-page: UNKNOWN +Author: Peter Pisljar +License: UNKNOWN +Platform: UNKNOWN +Requires-Python: >=3.8.0, <3.12 License-File: LICENSE + +# slovene_g2p +A converter that converts Slovene words to their IPA and/or SAMPA transcriptions. + + + +## usage + +``` +from slovene_g2p.SloveneG2P import SloveneG2P +g2p = SloveneG2P("ipa_symbol", "cjvt_ipa_detailed_representation", "phoneme_string") +g2p.convert_to_phonetic_transcription(word="govoriti", msd_sl="Ggdd-em", morphological_pattern_code="G1.2.d") +``` + +phoneme_option can be either "ipa_symbol" or "sampa_symbol" and representation option can be either "cjvt_ipa_detailed_representation", "cjvt_ipa_robust_representation", "cjvt_sampa_detailed_representation", "cjvt_sampa_robust_representation" + +both msd_sl and morphological_pattern_code are available in sloleks 3.0 and provided by classla python package + diff --git a/slovene_g2p.egg-info/SOURCES.txt b/slovene_g2p.egg-info/SOURCES.txt index aaf4b45..65178c7 100644 --- a/slovene_g2p.egg-info/SOURCES.txt +++ b/slovene_g2p.egg-info/SOURCES.txt @@ -1,10 +1,14 @@ LICENSE MANIFEST.in README.md +pyproject.toml setup.py +slovene_g2p/SloveneG2P.py +slovene_g2p/__init__.py slovene_g2p.egg-info/PKG-INFO slovene_g2p.egg-info/SOURCES.txt slovene_g2p.egg-info/dependency_links.txt +slovene_g2p.egg-info/requires.txt slovene_g2p.egg-info/top_level.txt slovene_g2p/resources/SloveneG2P_phoneme_set.json slovene_g2p/resources/schwa_rules.tsv diff --git a/slovene_g2p.egg-info/top_level.txt b/slovene_g2p.egg-info/top_level.txt index 8b13789..c560697 100644 --- a/slovene_g2p.egg-info/top_level.txt +++ b/slovene_g2p.egg-info/top_level.txt @@ -1 +1 @@ - +slovene_g2p diff --git a/slovene_g2p/SloveneG2P.py b/slovene_g2p/SloveneG2P.py index 9630c1e..74e2737 100644 --- a/slovene_g2p/SloveneG2P.py +++ b/slovene_g2p/SloveneG2P.py @@ -6,6 +6,19 @@ class SloveneG2P: + def __init__(self): + self.ipa_converter = SloveneG2PBase("ipa_symbol", "cjvt_ipa_detailed_representation", "phoneme_string") + self.sampa_converter = SloveneG2PBase("sampa_symbol", "cjvt_sampa_detailed_representation", "phoneme_string") + + def ipa(self, word, msd, mpc): + return self.ipa_converter.convert_to_phonetic_transcription(word, msd, mpc) + + def sampa(self, word, msd, mpc): + return self.sampa_converter.convert_to_phonetic_transcription(word, msd, mpc) + + +class SloveneG2PBase: + def __init__(self, representation_option, phoneme_set_option, output_option): self.phoneme_set_file_path = os.path.join(current_folder, "resources/SloveneG2P_phoneme_set.json") self.conversion_file_path = os.path.join(current_folder, "resources/table_of_obstruent_conversions.tsv") diff --git a/slovene_g2p/__pycache__/SloveneG2P.cpython-310.pyc b/slovene_g2p/__pycache__/SloveneG2P.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..20b23c24bfb8a5d48e17c21c6bbfdf6b0bff07b0 GIT binary patch literal 16069 zcmc&*Ymi(=b-u5exjQ@iUd`@GD{1xC>ans`@&hA;jcpklkc}m5EG)*O?b+2xvopJW zN0PnVF>+*r6L1jW5sZQC0!b_|kYGrF1d@s&DF=#IoClYXP?ZGYq$;FJ#g8ZzobT(t zciwAD_(yhYPCrhc)2B~&pYA^0x75{@H1PNA`EQGp|6~~d%tZa4jl>Q3dDjC7Lzo5Q zhz`voCel{HnzfHOrjavG#1`YZ#FBZ$z10x5a2_#)bIv-_A9LgEJeyiWq;3|4W(5~i>oF^<@p=@Cc7L>%duNC+2UTy%&e!i4A) zDTJ=*65R+pM33l2m=t}YA7Q5$5Q7L)VueT}>=Hv_7-6><5i1e)h*e?~VXs&%)*$Q? zYsDDCez8uhM>rrhh>Zvb#U`;C;R>+@4ci(d4;4zMbH&`PlXp%z5Ig!yAriyyCj7jI zOv5M}C(K2wYzuSA@{NpBO>aPW((GD+lghB*n9lhRs2 zUTU#x$++1#M=sqc*Mq#+5>vfM^;J{-NDV9*$R9*(g^HyS8&a`h#70zXC1R^oY!tMs z@#_|`$H;FD;%n>DV~DS-ORq!X}yXN~h#h&f8V%w9MS`QZYB1%Xqo+#0jrdT-Z@7 zR?Z$PPC&53+=*W6s*E1by6T{qsA zIdtgeT^S6xU7j9$nVHGChIp-c6LSv+$$e_Jm1gJm_fs-Xv$V0WrLI&alP~7Wnasq# zLTM^n@b*vCXfBw+ikfH{@8XMmcSS2rT=CbAEfpemiF9zLHr=+CTr%MHq zlQlWeS()iP`bHHVtaVCV?h2WkgMc8! z#HQB%U|qy&`64SDSmi~X`eY|sAH?$I+^iR@04m~ESHt8Cgk(B%x|E;FiA=eqW6-nL zepCkImtVAryuHLzK>*hsCt z(FpJ!oIO@5U{vVVHfN7Ew7D5OTy1ePW@A<6`p#`GHP+JV<`P?~e3}~7TyTS8^%8|_ zOvE6~*{F@kRB85DzNqFbtU+lmR}4C2E-MQ8B8E;3hB!ZmV6Koa2X9|N4X0T`bICo&y!SVi|6ZOoZ4wiIq zK?hIj;8QwyN(Z0O!P7eUf(|~fgRcZ;HZXI6nGeiTV4hOB-}rzEo_$kkQjo(E|Kav>3DsG&Z2RQxpl7?{fjJwP#lSorm<7pwS74sdwH`&Hx&H0Q zyN3C11z1?uRC{&qT!8?wz&sY1A}|-U7QPmkGt1Y%6V<-5TK{-c{Vj9v3gqYr%(=iU z2d1ax&|UoX#{%4vIYJKD+BG?A}Bv_Y96>-!0^1jh*2SCXdy zDF|`|(*!dF#|iQTCkRdw6bNPsiUcKsIf8it?y6+FKHmbwEkvq5U*FWWHY?N=w{p=R zr!OZHXO%};%7$<9SR(Al?Duq4>@qg?zK!jwa}Q2O4*t*Crcrcv8{W_vY(n{eFMehF zoPjMax4M1Y+9FJGYwIUQ4Bt}QTi1{IiG9ZWrulV#3|mF#pw?0ss+HXU7aYmagAEdf zDvTL+DuD$f*r6xe2+PqB1`f?%(8*#Mnon$>)~lUFWTdg<$QA`~^}D3n_NcZCkqPok zM|6z@&C_cQ%5r9290ce!Q|9Okaf_BL1uF+DW-sinQ-8zmsF88qPU-)xHxlR#x7Hg{ zqD|5RXi&FWa?TL-06?&^LCbn6SDMDYeu1TwI=drv>H^Jb97DmoS=a|y*tDF=8a9+u zw5ZV^2XOSexDpAy6jZEt8q;d%KH~Zo&x=?boP*Kdp;ecm|Dy z>*|zRTX-Ti;TEj!KvZR-SYNR9gj1XN!8THOS^^~t#gJU|X!b6$nXcJyFKoOV36||u z`F8MO-*pAfO;D_YP%h%^IvqmkD!nHR_o|Sc$$a#t3ipQTy7qqWkvlo2}T7Ht7%;-VwsOC}w6#NB6CVwE`dFEP<^ z(vsW#1YBHktd{*goLu9{=ADbA^GWs+^bsfz5SOT6bvRBV*F&QUFT|u8;cC;okCd+> z*bgx7Y9q|`Ett4MKS@j-DCKKN+f(O3DnB@v3wld3KZ8?fA@a%ueY!{)PMEMy>x`Z< z@EgGCSMa;YHpX$l>owP?Bb;^CF&AFd)O%MZ?Xn#yzlzE(PucQoC>knTsW{Si-K>>= zMs~}nbvdGXr>OjMFi=XB&oK6Nf^QJ0-MIovTbq#g81w%E;6=1^Ih8hOhgEP`-GraF z5g^+6`1BTcaCrOHX+!pyxWG7M%*TbT_dwn9EhsHtPw+Z|HxL{oxP#!01a}hLMR17V zFu|Ju)RF6%FaMmiKl}K%&K^FX8a(c(5gNGOgM|XU$w7a1Y6?C{45RA#DR84`lahkL zyqAe6GZEo$p$d;@C+#4Jo7M}CwXn8se6^!$xSeRw5f4?keZZ9VZRo}2m5ojd>!jrk zhH)E{e6Mc_XV?(2lZLmR`AjJ~Q{#rz;pm)sGUiQc{7@B7z9J4?Ck`GC&*^t*qd-$| z0JlSSAyT*wT*=ETZ*E+9$ilrATNS!qpaF$hM3&N3 z%5u!T)|lzwzrm3bm24P8+*U-U()$R+!)=HG(}>%dMN^qbLzzmaZ2RUp90sAAVPVZ+ zXFX$1V^oFt)Y%5+`6_n?$G>t!V-I8ZjS=}a90g53u5LLH3Y!|kO-DjF=#5+a*kTO1 zu^CIWb!gBFyIqdt9Ki86vTPNf{uIc5&A>OW| zBfrPO{6t+o`F*GjKbC(or7OaZjrt)@#LE+$;#+@rdn;4AQ&p<9%$$MSL=4OI=5ee zx}HMK-t7Rcb}QH^GX)o_;|wY%gKNQY8vYoA>+~#3(w$nLR{3ty?6tO<3)lQ|J=fxw z;K^PsJ3m2$UZQhm#m1y})WkQ-@DiOv9wz-AnDoXXmm*yGF4^*KUr&ea*lRh=9WglF zF=-rO!T>l+Cey{Opo*6!BPLq|&B(5}nBCiZ&-KW34RXz2^bTTdZ3T{ot@pck987MVkQoyB6tf{N&#zixyhV_)Qv)cJ{ z&;stDd8>^(e>nV%6BfJsgpJKIx>H$q-0Q=_;f@}+*SOzTx7LYD3URm6<(rEgSY>eA zzL@scdOw!fB`Vr0SjAMp{NHm^?B#$)0KXI!0(5&=>?ZD5#w54 z8M?1~EfTQv??cO7P>=Y7c$+Z7!90IIa6CFJ0herp z>jds|%h08O`zmmJ70}GH8@O)**V6{q3)~Nuq3Z+gr@*nz%{&Ky`#ErfZE!12TIWrP zW0FX>!44_xz%tljgvtJ`4LDC`}}a9*phZv}R&4edIG zodtG%8|(&!^_RhJRM>X`yQvNBW`+ImGT1E&`z3!^Y&~s2r=LZ+@fI$Q*w&Jl7TXo= zzk+s$qWu0g3#a}RvKD)IM! z32yD=Op9xw9b0}d)RQguLt@>_od4Ig^vbZ`iC(s464+NXVtw5QkzR)Vlq$LQGD>!- zl6z6|`WNNdPNGIF*|HRlQ*JXH@ybSiu);%PpQ>1R{wmFja|E`vDjyTkkmllT=#jk?m zO>MAuE9?h=y{8TK%?kTTVDD{%J)*Er1N)XX*!vXr$Kl9vIGG4B9ViuwSl{eFMN5VrjF9L~q)cVJ)HSK&*} z{tDl64pjJxbFjj9oGU70{-DUgi?bG1AV$xCPDAo_IG(ISxZWT2xf9snZy;`;QBFfH z8*&X&E_I~&7$nnHKJKOcLA7(-$S~YDZt^$!W2%Pl`J2MJ;1%$>l5Ivko15#=l=~pl z{DYu?*8|dhhq;V--%-4$)t+>Vza`}O9dO(NPFuloYa5OzA9CENDB>~=-de7oRi#N> zmNcZ*(u6G*HEGC^_(+<>i%V0}Cbh+$K5riNpi!;Iaji$S!`|kPV_d-^K?`yDQJ;GS z*cg?%9q}3Tq|!Qf{UXjdS{%oN0+ximQ6)2oV{9&4lG={TCzO3hABTIMxc7vjm{yW( z_izhg8Yu zP!cnE*^+E&LSp9V(Hyok;q|t(G^+c5>*{WmpDl$A47t|oZY{}{Vwa~%9**S0nBAs& z3cEvvc{Rrv{HkiMMbgPvgZ8*?RitT_s_SX&M-}~DFIti^V9%>czJFO69&Rhc!=TMy zmJIKIv6394m@NtU6lVdf*buLol(oYd7U^N8O_Z4bMC&|J=hSGHur)Y?0pgrC4ZkF~ z{;;K0k!F6NXGf^F#QYc4%+kH=%JnUD=v8pWSPD7+4^qt2GmP}xl$CPjSkq{7y)nfN zV*vfD=S?Fw(lm2pYxK-^By)@0-W}3N$z2wTb zzeF#&^QYVN(%sA2vTYc9uKb)hv4|6c9%rg$C*cCKqMT@qV|=^wk5={kqXpedQA@^s zo&sI@T$P)`pe;3S$fv|a^$8{Zww`j-0c zV296Db|p%6$kkXMyURG2(VL=H$3eL0c4Flp0X=*ckg@|ON_CDy8+Y!*^ROcF=3NnA z9Q6nBm}~;vR*Rx4xy>Jh6OQSUN>BO|xMNr&=FVYWsP7x*4=%(G@y*Tw3<@+^V6+IpBVb-#5FW4y7{H(if1R=e&x$E3RRdN*Tt61#1i*sxw#wTo zfqf{mdpy37xOGN|ow->?ic#cr7@6D2NRYTw6lZhz7(++OML7sQK`MIKKv!>2d(DQ| z*~OWKdv3Ywt~>6!e(bj5>1-h{#_~mcUNNRG4<^R$Eab9YZVZpMb5rH9@Iqv4OFhRe zV|bfb%i^Q&;>?zb@wIX-o6<=@H?G`Fu$5pN!4856f~yFgC-_f-?-G2QK)I#=jWK$p zgLuB^`~t^){Mnd28fIT6IBN~enDATD#W_~t8!PnYCuwk)q_sg7Ln z)ND?|*{prstC<;_gV^QqUGfH`6_{rtl{U5Aqv%WAs6@LCQo#C7J*4tD)YraVd-%PT;>YY&azO^^( zn%WKQDy51H?s#Mt-gWi4LalgoFBEYK`tZi|biQ=Tt6dOvsSBd0gp9N5j%Wz%u0C%@5VO2p&T`EK=eZ>%o0Y)pTk~fqaZkvrd zg0AVj#K$cW!)`B!+p+4Kk)W%5JTGg>l`UTp@Drb@G+tqjr?OY7!=uoDMW(*~xec z{oQX5TfGRKv+>SoJamJK+HlQuEb2Rk{O>gf!yaV{cV%a(-{o+_4$E}g-56B}6&$rN z*1$)naT9lz^}Te7jiBI-9d3L>n#P=@JqTg`kg6OsDQk8)8V-N=!FabFZKwK;UBvKes7;_kM8KM{%8B&;n88n$+G6IEH zGT!0}&dD!J%}aGR3h*le=~>B8!~&$i#4mgO+|-oJME!z-%;KD^#3Fr1&-mbA7yWpk ra{Xe6&iHhr0u*+9d}dx|NqoFsLFFwDo80`A(wtN~kafi%^92|Hb;c|x literal 0 HcmV?d00001 From e7cc0966daebed6f08ad1598b7e43976e6bb3add Mon Sep 17 00:00:00 2001 From: ppisljar Date: Tue, 30 Jan 2024 19:18:01 +0100 Subject: [PATCH 4/4] adding missing cinversions --- setup.py | 2 +- slovene_g2p.egg-info/PKG-INFO | 11 +++++------ .../__pycache__/SloveneG2P.cpython-310.pyc | Bin 16069 -> 16849 bytes .../table_of_obstruent_conversions.tsv | 6 +++++- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index cdab3d6..cd73a0e 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='slovene_g2p', - version='0.0.7', + version='0.0.9', author = "Peter Pisljar", description = "rule based slovenian g2p", long_description=README, diff --git a/slovene_g2p.egg-info/PKG-INFO b/slovene_g2p.egg-info/PKG-INFO index 5b97cc5..d603c73 100644 --- a/slovene_g2p.egg-info/PKG-INFO +++ b/slovene_g2p.egg-info/PKG-INFO @@ -1,13 +1,13 @@ Metadata-Version: 2.1 -Name: slovene-g2p -Version: 0.0.7 +Name: slovene_g2p +Version: 0.0.9 Summary: rule based slovenian g2p -Home-page: UNKNOWN Author: Peter Pisljar -License: UNKNOWN -Platform: UNKNOWN Requires-Python: >=3.8.0, <3.12 License-File: LICENSE +Requires-Dist: nltk>=3.6.7 +Requires-Dist: classla>=1.1.0 +Requires-Dist: reldi-tokeniser>=1.0.1 # slovene_g2p A converter that converts Slovene words to their IPA and/or SAMPA transcriptions. @@ -25,4 +25,3 @@ g2p.convert_to_phonetic_transcription(word="govoriti", msd_sl="Ggdd-em", morphol phoneme_option can be either "ipa_symbol" or "sampa_symbol" and representation option can be either "cjvt_ipa_detailed_representation", "cjvt_ipa_robust_representation", "cjvt_sampa_detailed_representation", "cjvt_sampa_robust_representation" both msd_sl and morphological_pattern_code are available in sloleks 3.0 and provided by classla python package - diff --git a/slovene_g2p/__pycache__/SloveneG2P.cpython-310.pyc b/slovene_g2p/__pycache__/SloveneG2P.cpython-310.pyc index 20b23c24bfb8a5d48e17c21c6bbfdf6b0bff07b0..d8ab703ff0629f95bb054b565982253dfd250993 100644 GIT binary patch delta 4257 zcma)X0D=TlorL&3?RiIT0x6alwv8O;|Px##c^g}z-}}-e zyG01B#+rkVH=6 zwVyATVWz{-qI+g+oO#3vdw-cb7>wwVWH6YEkz&XmR>H*7xn*s*x$=YUqzm(Lv6wJ! z8?l{W<7`|Rw?Aedvdyr;fWq^{*{(I|83!*U7t^sf&Q;SMGk`>xd#KPt5vuK^6@C{L zLNQA%8cPdRg_rA}uRxp<0`-^=yA<#O=%RH|A5J*|5X>tGsp5tCYLQig0hc(?)6gSb zR>IE@EJ&3W^m02Phc3fSaNFF9+fiVbrPJv%PL2tj-N71_oVnIwzrCh?A&?w%Ubh+v zC&TerqPGU2F!VB8s%b$M9^-7TV~nx-*$L->eTAG$zJVNh8_+IH_n0>`E{FUw5y|Z@ zD$fihp3gnrDV+<@IICG7mG#mkAAy^tR%JE*YF-PwQ=HE8^{yr_lZ{vbmI{T;z{L$h zsv#?oCx0CUx-P%6V_7ZRsNKMPfVG(cTEvAMpYY`5il_4Z6zP@x=H(;xQ)NYix-AnT`=7uA7c%Jfxb6ss?hc zbXf<#4dSiB9j;oqm5DxQnJ@eCZ=|X_#4b-&QF~^uV$yRmeFQP?Cu9VbIrspo9weB4 z7@qym-}H#TLg2Jvjs$A$BWJEQPw zak{voeH9wW;<=R-jTqL-!aT~1_S#b2Spz$|!Y_`O`0hZ%8M`%E^<gmtcb0KH-C+oqY zlj}k1*|1c5OV9n@BR71?`8kSlOFcJb#T(X=;vJ_?G`fo*Ym>}{rKV{Ig^=+L`7zg2m#H)9_vhb(+TB1%Ocs=O z4#V67+yz8{5#VmZC57WdXhlT>Qp3PY1{33E zE~Z>|D5?6)$q{c>JYIzdWwM>tBw*;<1I)BXHC@I&=3n2lC^mfE5hFMj->XHRqC43~ zf#P6oN-cBHUn=YEIqSIQJX?v!A5i|w)|xE%-evmnCMuyxe!p`4fXoeaLLIYvvnJAX zcSl39-D;@!0=mOAxNWIhG6&LPB%+a|m~?qXOHGM5Tvge$74f$L{lL2f)5$~nux7eK ziG-%(L>18kc{G8qRq0F@-PK)wq+qsyri|e1lz6Us$hT=Z0kbVNeinWoasCO&k-O>j zLR%~U5G9bp-vcfJt-wbF(;1~%F6+@0 zhq)F?(sB5T+keErYu6UlWQv__u6w2=EoWWoj3WNm0hx^t!wLa0;2VH^wR{_v3TVJL z0hzRq!IGJK0+tS7kLSk#naz*Fk|`}0#RxVznb`chsQEMS9UuYx1^5=g0UR#qpn3@Q z81Q4@Szwe9uv^Ckd7`)vr9T0l1LX1JI4oqVbuRf4th8KyPr}3su;Ts@*3-aA;28q- zE9<4ZJjeVDRnG&bfE4g^;1|FPfK+lCmV7t82umJ%ehG_)PB;s`3cLbL0IvaZ1iymy zIxq>G2jqL@x3J_)oP%{3_#GgRQon}v8{lQ&An*oZR3V%>V*Q)uXKc3ex%`yt@Fn=o z0zVZWwD*@clW7)QIg-fKby1PnwegGT4!G3=-9RsJ9dJEx1F#(!06qp3MtB$2-vGG? z{s#;BZ@MD7L7!TD6H0CZZUF{?+X$wcYxFHUu%5yb+G^TONcWVu` zqtKb455Z)h)q0EgM53c{>k^91Qu2$1_&$6&YK9Rc`|jn3aap|G(VV{*lHLJyQPP=W zHR8cezi&z1;)Tw3W{M9x&t-pRlv4Ym*$=wDWG}wzSbe^$E~&?kE^$gf4E9YCba-mK1O bi;u=r(yEpymQYz_uTW&x%cJm$Jp6NYO^&p0T2D0d-{TY=aBBpwqfq$&2Z2Hw2HIAe3R=C&nm%D6N$8y&LtAnVn=8= zGQZ@kcVPdo!sOX*xJJ5F^m`RSsjSde@*^Iu>D_rJsWoN0=66`e zVV1-44l{X(v<^GyI=i19D&4Li-8$$$ODJ@doc4;-)eWM)Hp=c4D{EV1X#8d#dT8Q6 z(SugzVZV_Rp+K`3nhPhd*LKI{o^IwOMh=FGPP$7@MBCiKzL~v6cmg&L07+mJz#e%D z&;bM32kaNgmd2gjgq#5^APbBE4+6IgX(^4~4hxx?&a#j`oIAN%E?R@(TEdi%BL$A{ zpq;fGpP5bSj-u;YwficYVAm|(4ez*j4$K-meYgN9RHnPRm zuSN)A7Gf&Nirwt(^?_!1!T8VX+%W722y;jHprZPTfE6DOT{5!mG&4na>T= z$S1AR>a--9*Yz&jB`|2FZMO&U5@oqZJcYipoz9EIYT$dy>!vp-wnd`day97;emHss z<#82ymDPC*U9((7#S@KDHX+VOzA*SzOejXnpT^_tu=q2iljS2wC=8Ujx?bNZUa7B- z%lXe1J(VI^O}c~7e+^U14ij;`emzAHZ#cH`V@8mN8yUfEr3iY(dx0*5u=uWwfBt%R1?X!b%B56BeKV+K8QPW5yQz<%@4E^gg*T$Pcx>V+8$_<}VSXh=a zwRN%;1`fO1%@H!QMPrWFG%UH+xz_lGinUZ)E1x}Vm?z0t`VT2Swnk}ZKKfTEyCkRk z+AgpKCeq1N55mwuna|21Zh9; zG9Yh;iJw9IIWP?z0)7R^9sC&5uK^Qy4Um`7FCb-sUjh#TzX8&~Y2c^8t3U>LoiL6j zc