From fa104fa316a9d061a1be6a9ce8d0de9b27f54da0 Mon Sep 17 00:00:00 2001 From: jaapkruijt <32510680+jaapkruijt@users.noreply.github.com> Date: Fri, 8 Oct 2021 17:07:48 +0200 Subject: [PATCH 1/3] Add capsule functions, integration with emissor --- .../named_entity_linking.cpython-39.pyc | Bin 2051 -> 2117 bytes brain_processing.py | 41 ++++++++ named_entity_linking.py | 16 ++-- simple_ner.py | 90 +++++++++++++++--- 4 files changed, 127 insertions(+), 20 deletions(-) create mode 100644 brain_processing.py diff --git a/__pycache__/named_entity_linking.cpython-39.pyc b/__pycache__/named_entity_linking.cpython-39.pyc index 1f96bc06ae8d8de56e77fb3bd6ec2ace5ca6117a..9941113b0f6271684e16a80eec0e349e5207a149 100644 GIT binary patch delta 341 zcmZvUK~95E5Qg9I9*-v#q*fMQpo%?#F>#|ijT?6)2C=*}M5Oqji6)>6kMJ(g#uJz> zJb{Ohtru~?L^sZ4{>;ob^S^i>o*O%kO)wHWipS#3{i4qtM}~ERRi2~FNJDElC0EA{ ztBnM14RuhHC8^m88ndQLsLd4sxfe(cqynk&52cqRpciTjJwr>$sm!O30d0rTOjgm| zysV;PUYsqmShYMZ3YjOZ_s9B&D&R`=|GA8^w5&pL6PGeea~X<-O8Z@FA*|#>rJvZo zzm4Yv7zj)oHh*-zW-8Jd-%xG< delta 245 zcmX>q&@8~0$ji&c00bP-)``9B8~GkH`>_DI%s}i6#Kl@bqJ}AjF`KDKq=sPuLk;6X z##-h)A)pu&P)rRd#t0FsVGd@{WcDlK2kMAoFG@{L%}cJ-WSX4FBBCu2pPpJ0pPOG? z5)YOr(PSy&0IIvio|hV5l3Gy`#gUX)oSKuFmpXYjiw49=j2w(SK+LuI2a719;4QYq zf`Zh%6irUB&LW}7(roIC!jqlZG#S+==du+u3Qc~WS_i Date: Fri, 15 Oct 2021 17:03:00 +0200 Subject: [PATCH 2/3] Separate update label to brain function --- brain_processing.py | 4 +- .../brain_log_2021-10-15-10-21-29.trig | 11 ++++++ .../brain_log_2021-10-15-10-22-10.trig | 11 ++++++ .../brain_log_2021-10-15-10-22-32.trig | 10 +++++ named_entity_linking.py | 39 +++++++++++++++++-- simple_ner.py | 2 + 6 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 logs/2021-10-15-10-21/brain_log_2021-10-15-10-21-29.trig create mode 100644 logs/2021-10-15-10-22/brain_log_2021-10-15-10-22-10.trig create mode 100644 logs/2021-10-15-10-22/brain_log_2021-10-15-10-22-32.trig diff --git a/brain_processing.py b/brain_processing.py index a80193b..0b1e3a4 100644 --- a/brain_processing.py +++ b/brain_processing.py @@ -1,5 +1,7 @@ from emissor.representation.scenario import Modality, ImageSignal, TextSignal, Mention, Annotation, Scenario from cltl.combot.backend.api.discrete import UtteranceType +from cltl.brain.infrastructure.rdf_builder import RdfBuilder +from rdflib import RDFS from datetime import date from random import getrandbits import requests @@ -12,7 +14,7 @@ def seq_to_text (seq): return text -def scenario_utterance_to_capsule(scenario: Scenario, signal: TextSignal, author:str, perspective:str, subj: str, pred:str, obj:str): +def scenario_utterance_to_capsule(scenario: Scenario, signal: TextSignal, author:str, perspective:dict, subj: str, pred:str, obj:str): place_id = getrandbits(8) location = requests.get("https://ipinfo.io").json() diff --git a/logs/2021-10-15-10-21/brain_log_2021-10-15-10-21-29.trig b/logs/2021-10-15-10-21/brain_log_2021-10-15-10-21-29.trig new file mode 100644 index 0000000..443cd3b --- /dev/null +++ b/logs/2021-10-15-10-21/brain_log_2021-10-15-10-21-29.trig @@ -0,0 +1,11 @@ +@prefix leolaniTalk: . +@prefix leolaniWorld: . +@prefix n2mu: . +@prefix rdfs: . + +leolaniWorld:Instances { + n2mu:jaap_1 rdfs:label "PhD", + "hij", + "jaap" . +} + diff --git a/logs/2021-10-15-10-22/brain_log_2021-10-15-10-22-10.trig b/logs/2021-10-15-10-22/brain_log_2021-10-15-10-22-10.trig new file mode 100644 index 0000000..443cd3b --- /dev/null +++ b/logs/2021-10-15-10-22/brain_log_2021-10-15-10-22-10.trig @@ -0,0 +1,11 @@ +@prefix leolaniTalk: . +@prefix leolaniWorld: . +@prefix n2mu: . +@prefix rdfs: . + +leolaniWorld:Instances { + n2mu:jaap_1 rdfs:label "PhD", + "hij", + "jaap" . +} + diff --git a/logs/2021-10-15-10-22/brain_log_2021-10-15-10-22-32.trig b/logs/2021-10-15-10-22/brain_log_2021-10-15-10-22-32.trig new file mode 100644 index 0000000..25cfe5c --- /dev/null +++ b/logs/2021-10-15-10-22/brain_log_2021-10-15-10-22-32.trig @@ -0,0 +1,10 @@ +@prefix leolaniTalk: . +@prefix leolaniWorld: . +@prefix rdfs: . + +leolaniWorld:Instances { + leolaniWorld:jaap_1 rdfs:label "PhD", + "hij", + "jaap" . +} + diff --git a/named_entity_linking.py b/named_entity_linking.py index 08e416b..cf67895 100644 --- a/named_entity_linking.py +++ b/named_entity_linking.py @@ -18,6 +18,11 @@ from cltl.brain.basic_brain import BasicBrain from cltl.brain.utils.helper_functions import read_query +from cltl.brain.infrastructure.rdf_builder import RdfBuilder +from rdflib import RDFS, Literal + +from tempfile import TemporaryDirectory + class NamedEntityLinker(BasicBrain): @@ -34,10 +39,10 @@ def link_entities(self, ne_list, baseline='popularity'): for ne_text in ne_list: if baseline == 'popularity': uri = self._get_most_popular(ne_text) - uri_list.append(uri) + uri_list.append((uri, ne_text)) elif baseline == 'recency': uri = self._get_most_recent(ne_text) - uri_list.append(uri) + uri_list.append((uri, ne_text)) return uri_list def _get_most_popular(self, ne_text): @@ -51,14 +56,40 @@ def _get_most_popular(self, ne_text): pop_ordered.append((uri, occurrences)) if pop_ordered: uri, popularity = pop_ordered[0] - # else: - # + else: + uri = [] # # TODO add functionality to add entity to graph return uri def _get_most_recent(self, ne_text): pass + def add_labels(self, capsule, uri=None): + ent_uri = self._rdf_builder.create_resource_uri('LW', capsule['subject']['id']) if not uri else uri + for label in capsule['labels']: + self.instance_graph.add((ent_uri, RDFS.label, Literal(label))) + + def add_labels_2(self, identity, labels, uri=None): + ent_uri = self._rdf_builder.create_resource_uri('LW', identity) if not uri else uri + for label in labels: + self.instance_graph.add((ent_uri, RDFS.label, Literal(label))) + + def update_brain(self): + + data = self._serialize(self._brain_log()) + code = self._upload_to_brain(data) + + +if __name__ == "__main__": + import pathlib + + log_path = pathlib.Path('./logs') + print(type(log_path)) + nel = NamedEntityLinker(address="http://localhost:7200/repositories/sandbox", + log_dir=log_path) + nel.add_labels_2('jaap_1', ['jaap', 'PhD', 'hij']) + nel.update_brain() + diff --git a/simple_ner.py b/simple_ner.py index 16192ed..3d9dc9e 100644 --- a/simple_ner.py +++ b/simple_ner.py @@ -17,6 +17,8 @@ import driver_util as d_util import text_to_triple as ttt +from rdflib.namespace import RDFS + utt = "Hi this is Jaap and his father Bart" # Idea: can the system search for NP's in the surroundings of a NE, and remember those From 5725d26f337d0f5e1e62314e69ad0c860ec3b665 Mon Sep 17 00:00:00 2001 From: jaapkruijt <32510680+jaapkruijt@users.noreply.github.com> Date: Wed, 1 Dec 2021 17:00:02 +0100 Subject: [PATCH 3/3] small changes to NER module, make popularity baseline work --- __pycache__/brain_processing.cpython-39.pyc | Bin 0 -> 2535 bytes .../named_entity_linking.cpython-39.pyc | Bin 2117 -> 3405 bytes __pycache__/simple_ner.cpython-39.pyc | Bin 0 -> 3642 bytes brain_processing.py | 48 ++++++++++++++- data/test_scenario/test_scenario.json | 33 +++++++++++ .../brain_log_2021-11-22-17-46-01.trig | 10 ++++ named_entity_linking.py | 17 ++---- recency.rq | 11 ++++ simple_ner.py | 55 +++++------------- 9 files changed, 122 insertions(+), 52 deletions(-) create mode 100644 __pycache__/brain_processing.cpython-39.pyc create mode 100644 __pycache__/simple_ner.cpython-39.pyc create mode 100644 data/test_scenario/test_scenario.json create mode 100644 logs/2021-11-22-17-46/brain_log_2021-11-22-17-46-01.trig create mode 100644 recency.rq diff --git a/__pycache__/brain_processing.cpython-39.pyc b/__pycache__/brain_processing.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e1883c3dbedf0d5c2ca6aa16adc5b7d92b82db09 GIT binary patch literal 2535 zcmaJ@%Z?jG6z!M0+mG=)lT0E=20|bPVn+z^QpgbU1~QAxWYY?=+FdoCcKYE|m!l+j zR>__p@M31iY!C~+fNx+`5{rPuC=!Sj3nGL#x9pjWP()jG>ej1$>()89I-HuS75G{CPr!oW@-giY6o`e1WsBCN~s&TCXHK(m->OPZ96Ha zm7t<+C#j~jpr-9oGL_bYy0+b9I-Ln-wCyFc!7TGx`LPkqahKbCo-d%_$0{R_Raxz^ z6CA<__0g!0@dY+D@>!itW9%?y&Wubp%jVEJ!n{?ZG5-{v8x2?aH*yvxv3#Ja*VC}e zSL1FLCaSW=@5_np-Qbyw^GsDPWLYjlZTqVc&q5LB4O30sl9G!si}>1uK1WC8CR_h> z5GRZaw49rluB=kcLP?|5E|-|iI3UsAH9eXIEOc3NDIPFAV#4#31g;)}=feq}WW+ zx*E%CgHCJZ@)u{KT<~=+INWb_L>Oo7zQ`k96miyV_7NAnzb*4N{R7dW@BxPDn7&yt z#XRhWr~G0P4&@f(2)f}wZsbB$`*5PqBN^Z0$|(k&o^twvGiB$UUgXicCJ(**2#VhQ zcQ7RO|H$0*q`74=Yuntiw~bAIX!M++*>jjJOQ_vPru2F~b7Wao#_dPu5o74E5_3lu z^M<9^9J*i-T0+Ny6GL$K@z; z@7T)8!W4|fOU0Y?=6U$B5yBfe5Schk4weUdR9E$VvrQXNem^f_tqqS(nYK};gN`H6 zUc5qYrPEd9C~>JBGf}0EQa=gdw681bJxOz4BY2g@y>US>KysCTEW7rbdstk@x`sb?mfR6n37)(C9yN8yT`6J&F4D?H zFFm9-ih)+!H!vhL1GSO9%%P3qNa&?!KQf0UW=aQY(=Dwok6BQOP3qB>gH{P@bI^9j zBT${ky!}~ZO=b;U{D<4~q`y^$3N7a2J;-iajh5EpxNlF9HLRW5S6O{Uyaa#62LvYw zmI#g$d`O_VM$|dLq>rh2p5ProqoNkOJVVl!yq$Lv6e4W+T@fV+ArYhYFlK~gkXz6~I9<${13s`~+qCGw~O1=}qw6M5%8({cElQCT^&}GrA zqktyXLAt>H9Ync-`M7sV8-o;=)WHkK!ibBaNkDvICbO8$93(!Z&xz$BdFs9{(kCm! zuIR*99a*@Z=rXFUh*MZCUMF~i;55No1mvzbL(n9kEr_##h9#aK3IzlmE2x4hokANz z#Rc@ANQuue;U-E!3k+*t4}ZI^9&0SB6FiNJA{R};k$wtXcUq#&y%-DHw6F;rf)W7* zLC|HSmZC%^%_vVhxomdA=nl_VGwjDr78jA=NM)oVbxKdu_hl0~UWD*>5Ft_WreN#s z&iM8h^reLK&DN8+BS><}r>_7RBhl*Q=hBxzp6ZMymN8+Ryy!si!4OAM%UmC!-r#|N nPzc8S@z13>L-sjOXIoI_Trz9>Ki~GfW5+G9?f36Yo19ny)7dpR$SN$CtPGZ6>PIGHUV``yn=l0Br9V0urJM&`C$Zp=5 z`LSRhX!-NM<6~j>6y8LLP== znPf0mhLfz|;lumCep`j#WYyHDh3|->%BoB#hGQ&G#v7UuSF{Oa9b zwW26rYVOJsoHj{}(`xM3xT5i4rV@Mq%%EqO$Zhh6H+powC^bTFBX#VphsS zoR=cu>=6bId-wU|cAe#1$cSp^{+)XdbY~|6AS8K;`>#IEvZKBRL6F!2z*D~Q9hm^= zx&62s+x#MTx%b$L9p2&oV-mZ(%LDlD@k_kN`!Lhtm-&ES0LkY=z7BKU?}$U<03f-k z*LMM0{4PS~a0h}aaU1 zTk}KNgX_GG22f}SU0i#?=&iH6>R?{Z>pYR*j}(nQdpr=(5x;`h1Z0mK*^PCF{Ndh_ z(@<^!Q*1Tlgfz~nb>ueoiPgA`Goh!z5s!S(prGM{2K^rzHUz}pm6zavxFbahCLMxH z*V&$^*sN3)TV73j$$Tyfu7~GSM!$+Y**D7z0rsmZ_7SV#2@(6X%*>eXz$6ljwqZza zfPULt1WEVNZLJG|Lf(KOg&M?iEFXS>HC(ofH(qq&Giy`hY+MTuviei+{Rui1u=TOk zSexXCf=>WS;58e3>NGa^bo3Xn5RbsvgfE9Sjv{cMjBF$xYPkG07)v+|q)+m;z%| zfdms-X_5&JN18ec47h`@7!KG`Si92Edy~ zA{xvUB6@Du^LKg;lzt9xo3z&Atyi7h!hiRfj77J1;5BD|0YXF7H9!KOjRW$|P<2WR z+EC18#1>%hI$)6OQ$UwpIs0zqHO{d$u}vQT1n(cYyYdF?r#leReJI$PW?Xxw(yAvN z8N!@=6PkFC6{<>rn%JI9=2LAW0pgvsYqX^ga^xB(=uVo3B?Vdy2s%JHIB|HZg0h+62N`+<28$Y&%7a zojo=P2sN|Ebr1|IL)1?Ra10bLEL|Ycr&t7*4H#6LEXlKlhy!N61{f5nc*yE`UP1-0 z%62re+h{pFsS@p^C8XVR9NMerFezGn1fDV#l|ll&#Ewx+ak&W{!Lhn-Ml2K%@K1+M0?yXKdxhLsUWPH$@wX zLk*zu>A(rr{B_?B+`xTW&}ZGqPg`rP%&zdMCE0*?x3IcNpVk%2+&1^D5sm zvDm7sELRfgB;H&ZhVquBt)nHzJ$`*t!$q#X^Gf%21Ols#V5VVh< zJ~$+?xN4_5oC|FiB0mq-N^}geOkwsvX@mbMLS2Q>I74J-_)*|!I{XOgGq7|I*qZ*w HqeJVzq#1YL delta 828 zcmZuvy>1gh5T5?#%2rGvDslwa=ACCyD~Vp@Ot?M*>{9 z!{Xq0cz}{`zi%{FTcCvUrl7oxpRmMN{!@tMr*9Xe9O$g0(&0GMZ|o1c*4)BN86-qO zLRI+`4vC_QO-l(=VF5xCu6!k@kO);$0j}ODa7Y|vWQVN&*h2{Ba$iF=f)O!rLSIrt z3^hz4-dj#U0mifTUQCj(m#6vo`Kx0!?q@NZiG2Jz(^}Xs?49rth}gmX*L-3M!hV*fu_8Q3J50>XF$TawjagjQVAUbr6}~keiyeMZeTq^ z(HbaLgNcWG{%h|Wjx~A4Kcq#i*A3T+;Y!qCpH94qG#*yx*}j+Fj+u4@BvC>H5o%8RebOmhliA)=&f+T|kgW-0Q9BLlx zo~A?+vp@g}kbH3B+!J7V`RrRR%2&PI6)DN5WXY+iuCD2Nc2&iblXVNf ze>(q&|J<^yf79alFTmm!p7IW}EN*d@S|Ppdlx22kGs2zJ$=uL2xSM*JANmIO(pnaT zfx-QBBCCgWgV)kV)(o53WH_0%!j_>0=~OlyP8)n8oylgy*=#PH%jU!R>{NIvI~|_R z&V*;oK1(r>hFwC^O+}Z_$Hs_b5E@B$9$fj!uzWD2|xYN{+7vG!Wz8A&m6F@Eo^?4pZkXW z^=f!cTotakw$H?seLH+xyd&P;XUzH?{Nq}#weK)1pMBT*mX$wSwwU?*10ZLsR(s() z^6yHU>DdP&>lHGVyLS>Px~h=7pt@hgY7_67l{gdpZmtrwdq2q^3JKzcvMcgfCPlQZ zl#p@W6_F~UZrm%kQvrsl)gy*g92akhwxe6O^Sn^8N{al!Zck|MLGe)J+F7~#B|h*z z&AHfV`#Sin;Bg9Jy76g-FISU|JWh4}f!I+;cgJJ#W(CHU!l|UWl1a}f?tXdaSF3wh?j?(AGbtDG|LZvJEyg)tr1d(S zBIM#PW2p|A_DiE&7dbipO#t)pIC#o`0dV#;yLKd>ATzm&8qAzm#BS@`^VQ`TTd-~S2N!YIsB&#l;9YpW+)Se;+azd z$oB?5uhUt|shp~|Z=@a|V~+M3M|*x19NV*yEzK3}mA&}mU2UDP)`o#}H|NOO_W(Am z%KFnaSZO`8wphisVC@!cC7aJ6Bb7VwK=BXQy8So%D|gj8wAwXEw_n={*G|HBw3CWl z(y_X}UdX6Kbn*@DQm_tfZ7(frPf_y7)8LT{0ROqw`E4m=+1Wz09?I=xOLguP-R%sS zQg%K`)V=Ms&dS~U*Sm!j>p}`-Y^O{z1U<@yTqu7e!h1# z=A4+Kc$~mXH`1aTr{zr`qmN$ki-!YN>7=s&#xK+4mx1xh0UKIxv~N3-(nh`kp#3yn z6KQm4%S*I$V1Z#8f-A)LCcyi6zQ^-wJV55i9}0SAJ+h<=ktd9_wf87aw{i0&iiDJl_zHKhL6=$W zM&!tsZ2T!WUN4&5Wkg>CVnkmi-zwzVmNRew3^ki{lr{bGH+6iU}$+?Uto)eVw9yk!3K6Mp;O^po%7;E^u1 zQfyESkUt0Ubz0m!u0^}1-K`=)Q!2$*+R4+NcJ`8Wk`NsnsZ#333&)gt>SSdpjgnCR9a5Mos*z#RP0zksi75X| z;uOG@E9#b{)7sgiezM&uN#Sya)a+`F`}HbfERec*@HF!#Y)22X@~@ z$8xC;Rczo?F7=DbLr-8UZCB2MSI>5(K*xcl))<9@MHD z`g>3X@J(%H@7ktPy^lH_(RPbyKW=!MH}e{y%_fp%#uX$O*mHVnelQ4mI)at2#S;^MJxWJy;R zrRWwpH(t?`DMq=-R9@)Op3nh_?P0nZhRDf9swYQBnGM|-FLC9k$BS|};c1sB@^caz zj1GP|Zf=Hf-v>)M4?rnP3F=bnvh%FvA!xBVAc}}HigZil5%uiou~>`BY;VG3%ynps z*5UWi%#)q=bu*lFcXgmJ%C0BqE);fMBZJapO;SbC^HjX~&}eEm`9n+<#({d;oI1)S zB2)9$b0SNq>@W2&TA)4A)F3H=^rg`b!iliKUNhJ(y|~$?y_g}!ly1HF179E}3Z|kT zni=Tg53r5gEgKLon*=5aP$?d^vuVOi$24uhM1Q7~-KFkktkw!uF3|{Vaz#?>)bl4c z`F55e6(j{lJ4z`%E%`c}w?uBk><80NnLXS+bGuC@jpIdJh?kodF_%=^WoWqJ9qsin zz#YHD<4&c68(G0I4%}qmE~&o-$OZF CtdTMR literal 0 HcmV?d00001 diff --git a/brain_processing.py b/brain_processing.py index 0b1e3a4..73f3698 100644 --- a/brain_processing.py +++ b/brain_processing.py @@ -40,4 +40,50 @@ def scenario_utterance_to_capsule(scenario: Scenario, signal: TextSignal, author {'type': 'pillbox', 'confidence': 0.32, 'id': 1}], "people": [{'name': 'Carl', 'confidence': 0.98, 'id': 1}] } - return capsule \ No newline at end of file + return capsule + + +# create a capsule for a TextSignal with a triple and perspective string +def scenario_utterance_and_triple_to_capsule(scenario: Scenario, + place_id: str, + location: str, + signal: TextSignal, + author: str, + utterance_type: UtteranceType, + perspective: dict, + triple: dict): + value = generate_obl_object_json(author) + capsule = {"chat": scenario.id, + "turn": signal.id, + "author": author, + "utterance": seq_to_text(signal.seq), + "utterance_type": utterance_type, + "position": "0-" + str(len(signal.seq)), # TODO generate the true offset range + "subject": {'label': triple['subject']['label'], 'type': triple['subject']['type']}, + "predicate": {'type': triple['predicate']['label']}, + "object": {'label': triple['object']['label'], 'type': triple['object']['type']}, + "perspective": perspective, + "context_id": scenario.scenario.context, + ##### standard elements + "date": date.today(), + "place": location['city'], + "place_id": place_id, + "country": location['country'], + "region": location['region'], + "city": location['city'], + "objects": value['objects'], + "people": value['people'] + } + + return capsule + + +# Function to generate bogus elements for capsules. Without these, the update function fails +def generate_obl_object_json(human: str): + json_string = { + "objects": [{'type': 'chair', 'confidence': 0.59, 'id': 1}, + {'type': 'table', 'confidence': 0.73, 'id': 1}, + {'type': 'pillbox', 'confidence': 0.32, 'id': 1}], + "people": [{'name': human, 'confidence': 0.98, 'id': 1}] + } + return json_string \ No newline at end of file diff --git a/data/test_scenario/test_scenario.json b/data/test_scenario/test_scenario.json new file mode 100644 index 0000000..2ee9be2 --- /dev/null +++ b/data/test_scenario/test_scenario.json @@ -0,0 +1,33 @@ +{ + "context": "AGENT", + "id": "test_scenario", + "signals": { + "image": "./image.json", + "text": "./text.json" + }, + "@context": { + "Scenario": "https://emissor.org#Scenario", + "id": "@id", + "context": "https://emissor.org#context", + "signals": "https://emissor.org#signals", + "ruler": "https://emissor.org#ruler" + }, + "@type": "Scenario", + "ruler": { + "container_id": "test_scenario", + "start": 662994, + "end": 662999, + "@context": { + "TemporalRuler": "https://emissor.org#TemporalRuler", + "id": "@id", + "start": "https://emissor.org#start", + "end": "https://emissor.org#end", + "container_id": { + "@id": "https://emissor.org#container_id", + "@type": "@id" + } + }, + "@type": "TemporalRuler", + "_py_type": "emissor.representation.container-TemporalRuler" + } +} \ No newline at end of file diff --git a/logs/2021-11-22-17-46/brain_log_2021-11-22-17-46-01.trig b/logs/2021-11-22-17-46/brain_log_2021-11-22-17-46-01.trig new file mode 100644 index 0000000..25cfe5c --- /dev/null +++ b/logs/2021-11-22-17-46/brain_log_2021-11-22-17-46-01.trig @@ -0,0 +1,10 @@ +@prefix leolaniTalk: . +@prefix leolaniWorld: . +@prefix rdfs: . + +leolaniWorld:Instances { + leolaniWorld:jaap_1 rdfs:label "PhD", + "hij", + "jaap" . +} + diff --git a/named_entity_linking.py b/named_entity_linking.py index cf67895..e995449 100644 --- a/named_entity_linking.py +++ b/named_entity_linking.py @@ -16,23 +16,17 @@ from cltl.brain.utils import base_cases from cltl.brain.basic_brain import BasicBrain +from cltl.brain.long_term_memory import LongTermMemory from cltl.brain.utils.helper_functions import read_query from cltl.brain.infrastructure.rdf_builder import RdfBuilder from rdflib import RDFS, Literal -from tempfile import TemporaryDirectory - - -class NamedEntityLinker(BasicBrain): +class NamedEntityLinker(LongTermMemory): def __init__(self, address, log_dir, clear_all=False): - super(NamedEntityLinker, self).__init__(address, log_dir, clear_all, is_submodule=True) - - # Problem: How are uri's defined right now in the brain? Is ambiguity taken into account? --> - # Otherwise uri's are the same - # E.g. if labels are firstname-lastname then query needs to be RE only looking at part before hyphen + super(NamedEntityLinker, self).__init__(address, log_dir, clear_all) def link_entities(self, ne_list, baseline='popularity'): uri_list = [] @@ -51,14 +45,15 @@ def _get_most_popular(self, ne_text): # print(response) pop_ordered = [] for row in response: + print(row) uri = row['ent']['value'] occurrences = row['num_mentions']['value'] pop_ordered.append((uri, occurrences)) if pop_ordered: uri, popularity = pop_ordered[0] else: - uri = [] - # # TODO add functionality to add entity to graph + uri_name = f'{ne_text}_1' + uri = self._rdf_builder.create_resource_uri('LW', uri_name) return uri def _get_most_recent(self, ne_text): diff --git a/recency.rq b/recency.rq new file mode 100644 index 0000000..fc3fd5b --- /dev/null +++ b/recency.rq @@ -0,0 +1,11 @@ +prefix gaf: +PREFIX rdfs: + +select ?ent (COUNT(DISTINCT ?e) as ?num_mentions) where{ + ?ent rdfs:label "%s". + + ?ent gaf:denotedIn ?e. + } + +group by ?ent + order by DESC(COUNT(DISTINCT ?e)) \ No newline at end of file diff --git a/simple_ner.py b/simple_ner.py index 3d9dc9e..f1695e8 100644 --- a/simple_ner.py +++ b/simple_ner.py @@ -11,37 +11,21 @@ from emissor.representation.scenario import Modality, ImageSignal, TextSignal, Mention, Annotation, Scenario import uuid import time +from datetime import datetime # These modules are not included in NEL-coreference at the moment!! Won't work outside this machine -import capsule_utils -import driver_util as d_util -import text_to_triple as ttt +from src.chatbots.util import driver_util, capsule_util +from src.chatbots.dummies import text_to_triple as ttt from rdflib.namespace import RDFS -utt = "Hi this is Jaap and his father Bart" +utt = "Carl likes Bart" # Idea: can the system search for NP's in the surroundings of a NE, and remember those # TODO Testing linking separate from NER (by e.g. using hashes and a dict) (NamedEntityRecognizer) -# Make linking independent from ner function (so not nested inside the ner function but use its output), CHECK! -# Updating the brain: a lot of it is already done automatically in the LTM update() function -# If I do it here as well then it is done twice; what is the right way to approach this? # Using dummy triples that don't require an utterance? - -# def named_entity_recognition(utterance, nel: NamedEntityLinker): -# # processor_name = "spaCy" -# -# doc = nlp(utterance) -# -# # tokens = [token.text for token in doc] -# -# # entity_label = [ent.label_ for ent in doc.ents] -# entity_text = [ent.text.lower() for ent in doc.ents] -# -# return entity_text - def add_ner_annotation(signal: TextSignal): processor_name = "spaCy" utterance = ''.join(signal.seq) @@ -68,35 +52,26 @@ def add_ner_annotation(signal: TextSignal): def utterance_processor(utterance, scenario, brain, author): - text_signal = d_util.create_text_signal(scenario, utterance) - # @TODO - ### Apply some processing to the text_signal and add annotations + text_signal = driver_util.create_text_signal(scenario, utterance) + entity_text = add_ner_annotation(text_signal) scenario.append_signal(text_signal) - ## Post triples to the brain: - - subj, pred, obj = ttt.getTriplesFromEntities(entity_text, text_signal.id) - response = {} - if not subj == "": - print('Subject:', subj, 'Predicate:', pred, 'Object:', obj) - perspective = {"certainty": 1, "polarity": 1, "sentiment": 1} + return entity_text - capsule = scenario_utterance_to_capsule(scenario, text_signal, author, perspective, subj, pred, obj) - # perspective is a dict instead of a str? - # print('Capsule:', capsule) - response = brain.update(capsule, reason_types=True) - # print(thoughts) - - -def main(log_path): +def main(log_path, utterance): nel = NamedEntityLinker(address="http://localhost:7200/repositories/sandbox", log_dir=log_path) - entity_text = add_ner_annotation(utt) + scenario_path = './data' + scenario_id = 'test_scenario' + scenario_storage = driver_util.create_scenario(scenario_path, scenario_id) + scen = scenario_storage.create_scenario(scenario_id, datetime.now().microsecond, datetime.now().microsecond, 'AGENT') + entity_text = utterance_processor(utterance, scen, nel, 'Jaap') # link_entities expects a list with all entities in one # but the new ner gives a list with a single entity per utterance(?) + entities = nel.link_entities(entity_text) return entities @@ -105,5 +80,5 @@ def main(log_path): if __name__ == "__main__": nlp = spacy.load('en_core_web_sm') with TemporaryDirectory(prefix="brain-log") as log_path: - res = main(Path(log_path)) + res = main(Path(log_path), utt) print(res) \ No newline at end of file