diff --git a/_images/0f9be4bc798826a7715b548421e591c4a6528547a97c8350c9f8113676b8cd59.png b/_images/0f9be4bc798826a7715b548421e591c4a6528547a97c8350c9f8113676b8cd59.png deleted file mode 100644 index ec303652..00000000 Binary files a/_images/0f9be4bc798826a7715b548421e591c4a6528547a97c8350c9f8113676b8cd59.png and /dev/null differ diff --git a/_images/102e78040ff456694f0069c23d106300b6047f1c7b9b0a212eb5aecf969dd07b.png b/_images/102e78040ff456694f0069c23d106300b6047f1c7b9b0a212eb5aecf969dd07b.png new file mode 100644 index 00000000..9cc81c1e Binary files /dev/null and b/_images/102e78040ff456694f0069c23d106300b6047f1c7b9b0a212eb5aecf969dd07b.png differ diff --git a/_images/38d1469220ed84d85d484b6f8f1a356337fa74308cc504285292a59f305b2db8.png b/_images/38d1469220ed84d85d484b6f8f1a356337fa74308cc504285292a59f305b2db8.png deleted file mode 100644 index a71465f7..00000000 Binary files a/_images/38d1469220ed84d85d484b6f8f1a356337fa74308cc504285292a59f305b2db8.png and /dev/null differ diff --git a/_images/43c8357fc985747deaaaa078aee54fc4146152e054fbd8b7265f9fed7be2a9da.png b/_images/43c8357fc985747deaaaa078aee54fc4146152e054fbd8b7265f9fed7be2a9da.png new file mode 100644 index 00000000..6ea6ba9c Binary files /dev/null and b/_images/43c8357fc985747deaaaa078aee54fc4146152e054fbd8b7265f9fed7be2a9da.png differ diff --git a/_images/4998ba1fe3108b7cbf7f1a637ed0a893985c53b5fe12dbc7b65acdfb567f9b80.png b/_images/4998ba1fe3108b7cbf7f1a637ed0a893985c53b5fe12dbc7b65acdfb567f9b80.png new file mode 100644 index 00000000..3f18d077 Binary files /dev/null and b/_images/4998ba1fe3108b7cbf7f1a637ed0a893985c53b5fe12dbc7b65acdfb567f9b80.png differ diff --git a/_images/80d8f5ea4364e618ca9620a3b1cbb525c23ad4a3bd80652d02453eca44b61fe3.png b/_images/80d8f5ea4364e618ca9620a3b1cbb525c23ad4a3bd80652d02453eca44b61fe3.png deleted file mode 100644 index 363a2dcc..00000000 Binary files a/_images/80d8f5ea4364e618ca9620a3b1cbb525c23ad4a3bd80652d02453eca44b61fe3.png and /dev/null differ diff --git a/_modules/dacy/score/score.html b/_modules/dacy/score/score.html index 150a13a7..78f34a97 100644 --- a/_modules/dacy/score/score.html +++ b/_modules/dacy/score/score.html @@ -403,7 +403,7 @@

Source code for dacy.score.score

             for fn in score_fn:
                 if isinstance(fn, str):
                     fn = def_scorers[fn]  # noqa
-                scores.update(fn(examples))
+                scores.update(fn(examples))  # type: ignore
             scores = flatten_dict(scores)
             scores_ls.append(scores)
 
diff --git a/searchindex.js b/searchindex.js
index 8605d173..710c7e19 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["adv_tutorials", "api/dacy.datasets", "api/dacy.download", "api/dacy.score", "faq", "index", "installation", "news", "performance", "performance.general", "performance.robustness", "performance_ner", "tutorials", "tutorials/basic", "tutorials/hate-speech", "tutorials/robustness", "tutorials/sentiment", "tutorials/textdescriptives"], "filenames": ["adv_tutorials.md", "api/dacy.datasets.rst", "api/dacy.download.rst", "api/dacy.score.rst", "faq.md", "index.md", "installation.md", "news.rst", "performance.rst", "performance.general.rst", "performance.robustness.rst", "performance_ner.ipynb", "tutorials.md", "tutorials/basic.ipynb", "tutorials/hate-speech.ipynb", "tutorials/robustness.ipynb", "tutorials/sentiment.ipynb", "tutorials/textdescriptives.ipynb"], "titles": ["Advanced Tutorials", "Datasets", "Downloading and Loading", "Score", "FAQ", "DaCy", "Installation", "News and Updates", "Performance", "State of the Art Comparison", "Robustness and Biases", "Named Entity Recognition", "Using DaCy", "Getting started", "Hate Speech", "Evaluating Robustness", "Sentiment Analysis", "Extracting Metrics from text using TextDescriptives"], "terms": {"daci": [0, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "includ": [0, 1, 2, 3, 7, 9, 10, 11, 12, 13, 14, 15, 16], "seri": [0, 4, 7, 10, 11, 12, 16], "slightli": [0, 11, 15], "more": [0, 7, 10, 11, 12, 13, 15, 17], "These": [0, 2, 10, 11, 12, 14, 16], "ar": [0, 1, 2, 3, 7, 9, 10, 12, 13, 16, 17], "meant": [0, 11], "show": [0, 9, 10, 11, 13, 14, 15, 16, 17], "how": [0, 5, 10, 11, 12, 15, 16, 17], "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16], "task": [0, 7, 9, 10, 11, 13], "evalu": [0, 9, 10, 11], "integr": 0, "other": [0, 7, 9, 10, 11, 12, 13, 15, 17], "librari": [0, 4, 15, 16], "robust": [0, 5, 7, 8, 9], "instal": [0, 2, 4, 5, 7, 13, 16], "packag": [0, 2, 4, 5, 7, 13, 16, 17], "load": [0, 1, 3, 7, 13, 16, 17], "model": [0, 2, 3, 7, 9, 10, 12, 13, 17], "data": [0, 4, 7, 10, 11, 13], "estim": [0, 10, 11, 16], "perform": [0, 3, 5, 7, 10, 13, 14, 16, 17], "bias": [0, 5, 7, 8, 9, 16], "extract": [0, 3, 7, 11, 13], "metric": [0, 3, 7, 8, 11, 15], "from": [0, 1, 3, 4, 6, 7, 9, 11, 13, 15, 16], "text": [0, 2, 7, 10, 11, 13, 14, 16], "textdescript": [0, 7], "ad": [0, 7, 11, 15], "compon": [0, 3, 7, 13, 16], "exploratori": 0, "analysi": [0, 7, 10, 12, 14], "thi": [1, 3, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17], "wrap": [1, 7, 12], "read": [1, 5, 9, 10, 11], "spaci": [1, 2, 3, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17], "corpu": [1, 3, 11], "save_path": 1, "none": [1, 3, 13], "split": [1, 3, 13, 15, 16], "train": [1, 3, 7, 10, 11, 12, 13, 14, 15, 16, 17], "dev": [1, 3], "test": [1, 3, 5, 7, 9, 10, 11, 13, 15, 17], "redownload": [1, 2], "fals": [1, 2, 13, 15, 16, 17], "n_sent": [1, 3], "1": [1, 2, 3, 7, 11, 13, 15, 16, 17], "open_unverified_connect": 1, "kwarg": [1, 2, 3], "sourc": [1, 2, 3, 7], "paramet": [1, 2, 3], "str": [1, 2, 3, 11], "option": [1, 2, 3, 9], "path": [1, 2], "If": [1, 2, 3, 4, 5, 9, 11, 12, 13, 16, 17], "doe": [1, 9, 11, 14, 16], "contain": [1, 3, 5, 8, 10, 11, 14, 16, 17], "i": [1, 2, 3, 5, 7, 8, 10, 11, 13, 14, 15, 16, 17], "download": [1, 7, 13, 15, 17], "folder": [1, 4], "default": [1, 2, 3, 7, 13, 15], "correspond": 1, "where_is_my_daci": [1, 2], "subfold": 1, "list": [1, 2, 3, 13, 15], "which": [1, 3, 4, 7, 9, 10, 11, 13, 15, 16], "should": [1, 2, 3, 7, 13], "return": [1, 2, 3, 15], "possibl": [1, 3, 16], "all": [1, 3, 4, 7, 9, 10, 11, 13, 17], "bool": [1, 2, 3], "int": [1, 3], "number": [1, 3, 11, 13], "sentenc": [1, 3, 12, 16, 17], "per": [1, 11, 13], "document": [1, 5, 7, 11, 13, 16, 17], "onli": [1, 7, 9, 10, 11, 13, 16, 17], "appli": [1, 3, 7, 9, 10, 11, 13, 14, 15, 16], "you": [1, 2, 3, 4, 5, 6, 7, 9, 12, 13, 14, 15, 16, 17], "an": [1, 3, 5, 7, 11, 13, 14, 15, 16], "unverifi": 1, "connect": 1, "force_extens": 1, "set": [1, 9, 10, 11, 14, 15, 16, 17], "extens": [1, 4, 7, 11, 14, 16], "doc": [1, 3, 4, 11, 13, 14, 16, 17], "regardless": [1, 2, 15], "whether": [1, 7, 14, 15, 16], "alreadi": [1, 2, 11, 13, 15, 17], "exist": [1, 7, 11, 13, 14, 16], "thereof": [1, 3], "type": [1, 2, 3, 11, 13, 14, 15, 16], "union": [1, 3], "exampl": [1, 2, 3, 13, 14, 15, 16], "import": [1, 2, 3, 9, 11, 13, 14, 15, 16, 17], "helper": 1, "function": [1, 2, 3, 7, 10, 13, 15], "dictionari": [1, 3, 12], "person": [1, 7, 10, 11, 13, 14], "augment": [1, 3, 4, 7, 15], "danish_nam": 1, "danish": [1, 3, 4, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17], "A": [1, 2, 3, 4, 7, 9, 10, 11, 13, 14], "kei": [1, 15, 17], "first_nam": 1, "last_nam": 1, "The": [1, 2, 3, 4, 7, 9, 10, 11, 13, 14, 16, 17], "deriv": [1, 11, 13], "danmark": [1, 13], "statistik": 1, "2021": [1, 4, 10, 11], "dict": [1, 3], "female_nam": [1, 15], "femal": [1, 15], "load_nam": 1, "min_count": 1, "0": [1, 2, 3, 7, 9, 11, 13, 15, 16, 17], "ethnic": 1, "gender": 1, "min_prop_gend": 1, "lookup": 1, "tabl": [1, 9, 10, 11], "muslim": [1, 11], "meldgaard": 1, "2005": 1, "http": [1, 4, 6, 7, 13], "nor": 1, "ku": 1, "dk": 1, "publikation": 1, "webpublikation": 1, "muslimske_fornavn": 1, "minimum": 1, "occur": 1, "indic": [1, 2, 3, 9, 13], "male": [1, 11, 13], "float": [1, 15], "probabl": [1, 11, 13, 16], "being": [1, 2, 7, 16], "given": [1, 3, 9, 11, 13], "specif": [1, 4, 7, 9, 11], "base": [1, 7, 10, 11, 12, 13, 14], "proport": 1, "peopl": [1, 11, 13], "when": [1, 7, 9, 10, 11, 16], "male_nam": 1, "muslim_nam": 1, "method": [2, 11, 16], "them": [2, 13], "memori": [2, 13], "download_model": 2, "forc": 2, "specifi": [2, 3], "pipelin": [2, 3, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17], "string": [2, 3], "get": [2, 5, 9, 12, 15, 16, 17], "present": 2, "locat": [2, 7, 11, 13], "da_dacy_medium_trf": [2, 11, 13], "get_latest_vers": 2, "latest": [2, 7, 13, 17], "version": [2, 3, 7, 11, 13, 17], "valid": [2, 3, 13, 17], "process": [2, 3, 4, 7, 9, 10, 11], "also": [2, 4, 5, 7, 9, 10, 11, 13, 14, 15, 16], "wish": [2, 3, 11, 15, 16], "To": [2, 4, 7, 11, 13, 14, 15, 16, 17], "see": [2, 10, 11, 13, 15, 16, 17], "avail": [2, 4, 7, 11, 13, 17], "even": [2, 10, 13], "ani": [2, 7, 11, 13, 15, 16, 17], "addit": 2, "argument": [2, 3, 15], "pass": [2, 3, 13, 17], "preprocess": [2, 5], "languag": [2, 3, 4, 5, 7, 9, 10, 11, 13, 16], "equival": [2, 16], "medium": [2, 7, 9, 11, 13], "verbos": [2, 3], "true": [2, 3, 11, 13, 15, 16, 17], "where": [2, 7, 9, 11, 13, 16, 17], "can": [2, 3, 4, 6, 7, 9, 10, 11, 13, 15, 16, 17], "configur": [2, 7], "environment": [2, 7], "variabl": [2, 7], "dacy_cache_dir": [2, 7], "toggl": [2, 3], "beta": [3, 7], "no_misc_gett": 3, "attr": 3, "util": [3, 4, 9, 11, 13, 17], "getter": 3, "entiti": [3, 5, 7, 8, 12, 15], "without": [3, 13], "misc": [3, 11, 13], "attribut": 3, "iter": 3, "span": [3, 13, 15, 16], "apply_fn": [3, 15], "score_fn": [3, 15], "token": [3, 9, 10, 11, 13, 15, 16], "po": [3, 9, 13, 15, 16], "ent": [3, 13, 15], "dep": 3, "k": [3, 4, 15], "nlp": [3, 4, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17], "potenti": 3, "callabl": 3, "wrapper": [3, 15], "take": [3, 11, 16, 17], "output": [3, 14, 15, 16], "tag": [3, 5, 7, 8, 9, 11, 12, 15], "provid": [3, 15], "four": [3, 11], "potienti": 3, "measur": [3, 13, 17], "fine": [3, 7, 8, 12], "grain": [3, 7, 8, 12], "tag_acc": [3, 15], "coars": 3, "pos_acc": [3, 15], "depend": [3, 4, 5, 7, 8, 9, 12, 15, 16, 17], "pars": [3, 5, 7, 8, 9, 12, 15, 16], "style": [3, 4, 11, 13, 16], "time": [3, 10, 11, 13, 15], "run": [3, 4, 9, 11, 13, 14, 15, 16, 17], "empti": [3, 9, 13, 14, 16], "call": [3, 15], "panda": [3, 15], "datafram": [3, 15, 17], "create_lower_casing_augment": [3, 15], "dataset": [3, 11, 15, 17], "dane": [3, 7, 8, 9, 10, 15], "da_dacy_small_tft": 3, "5": [3, 7, 11, 13, 15, 16, 17], "vari": 3, "input": [3, 9, 10], "length": 3, "n_sents_scor": 3, "form": [3, 13], "score_nam": 3, "defualt": 3, "come": [4, 13, 14, 16, 17], "suit": [4, 7], "implement": [4, 9, 16], "pytest": 4, "In": [4, 9, 10, 11, 13, 15, 17], "order": 4, "have": [4, 5, 7, 10, 11, 13, 14, 17], "clone": 4, "repositori": [4, 5], "requir": [4, 7, 10, 11, 13, 15], "defin": [4, 14, 15, 16], "extras_requir": 4, "section": [4, 9, 10], "pyproject": 4, "toml": 4, "git": [4, 6], "github": [4, 6, 7, 11, 13, 15], "com": [4, 6, 7], "centr": [4, 6, 7], "human": [4, 6, 7, 10, 16], "comput": [4, 6, 7, 11, 13, 17], "pip": [4, 6, 7, 15, 16], "e": [4, 7, 11, 13, 16], "python": [4, 13, 15, 17], "m": [4, 13, 15, 17], "desired_test": 4, "py": [4, 13, 15, 16, 17], "want": [4, 10, 11, 13, 14, 15, 16, 17], "check": [4, 7, 9, 13, 16], "coverag": 4, "follow": [4, 7, 9, 10, 11, 13, 16, 17], "cov": 4, "src": [4, 15], "sphinx": 4, "It": [4, 7, 11, 13, 14, 16], "furo": 4, "theme": 4, "custom": [4, 7, 13, 16, 17], "make": [4, 10, 11, 13, 17], "html": 4, "build": [4, 11, 13, 17], "b": 4, "_build": 4, "your": [4, 7, 11, 13, 14, 15, 16, 17], "research": [4, 16], "would": [4, 5, 11, 17], "much": [4, 11], "appreci": 4, "inproceed": 4, "f975f4ce65944e3ea958578003cee622": 4, "titl": [4, 11, 13], "unifi": [4, 9, 10, 11], "framework": [4, 5, 9, 10, 11, 13, 15], "booktitl": 4, "ceur": 4, "workshop": 4, "proceed": [4, 11], "author": 4, "enevoldsen": [4, 11], "kenneth": [4, 11], "hansen": [4, 11], "lass": [4, 11], "nielbo": [4, 11], "kristoff": [4, 11], "l": 4, "date": [4, 11, 13], "volum": 4, "2989": 4, "page": [4, 9, 10, 11, 13], "206": 4, "216": 4, "publish": 4, "issn": 4, "1613": 4, "0073": 4, "keyword": 4, "low": [4, 7], "resourc": [4, 11], "natur": [4, 5, 9, 11], "Or": 4, "prefer": 4, "apa": 4, "paper": [4, 7, 9, 10, 11, 15], "publicli": 4, "here": [4, 7, 11, 13, 16, 17], "preprint": [4, 7, 11], "made": [5, 11, 13], "Its": 5, "largest": [5, 13], "ha": [5, 7, 10, 13], "achiev": [5, 13], "state": [5, 7, 8, 12, 13, 14], "art": [5, 7, 8, 12, 13, 14], "name": [5, 7, 8, 12, 14, 15, 17], "recognit": [5, 7, 8, 12], "part": [5, 7, 8, 9, 11, 12, 15, 16], "speech": [5, 7, 8, 9, 12, 15], "feel": 5, "free": [5, 17], "try": [5, 7, 11, 16, 17], "out": [5, 7, 9, 10, 13, 15, 16], "demo": [5, 7], "materi": 5, "reproduc": [5, 7], "result": [5, 11, 15, 16], "guid": [5, 10, 12], "usag": [5, 9, 12], "furthermor": [5, 16], "behavior": [5, 15], "websit": 5, "well": [5, 7, 10, 11, 12, 13], "introduct": [5, 11], "start": [5, 7, 10, 12, 15, 16, 17], "its": [5, 7, 9, 10, 11, 13, 14, 16, 17], "variou": 5, "featur": 5, "instruct": 5, "tutori": [5, 7, 10, 12, 15, 17], "uisng": 5, "faq": 5, "frequent": 5, "ask": 5, "question": [5, 13], "answer": [5, 13], "bug": [5, 13], "report": [5, 9, 13], "found": [5, 17], "pleas": [5, 10, 11, 13], "issu": [5, 7, 11], "request": 5, "idea": 5, "new": [5, 10, 11], "creat": [5, 11, 14, 16], "do": [5, 7, 10, 11, 13, 17], "about": [5, 13, 16], "discuss": 5, "forum": 5, "gener": [5, 7, 8, 10, 13], "For": [5, 9, 10, 11, 13, 17], "genral": 5, "dicuss": 5, "contribut": 5, "like": [5, 11, 13, 15, 17], "guidelin": 5, "via": 6, "pypi": [6, 7], "2": [7, 11, 13, 15, 16, 17], "7": [7, 11, 13, 17], "15": 7, "05": [7, 11], "23": [7, 11], "small": [7, 10, 11, 13, 15, 17], "larg": [7, 9, 10, 11, 13, 16], "support": [7, 13], "corefer": [7, 12], "resolut": [7, 12], "link": [7, 12], "ddt": [7, 10], "treebank": [7, 15], "trainabl": 7, "lemmat": [7, 12], "notabl": [7, 9, 10, 16, 17], "improv": [7, 13], "intersect": 7, "between": [7, 9, 13, 14, 16, 17], "cdt": 7, "so": [7, 11, 13, 17], "actual": [7, 17], "less": 7, "than": [7, 10, 11, 13], "befor": [7, 13], "annot": [7, 11, 13], "dacoref": 7, "obtain": [7, 9, 10, 13, 14, 15, 16], "morpholog": 7, "84": [7, 11], "91": [7, 11], "95": [7, 11], "89": [7, 11], "reduc": [7, 10, 13], "ner": [7, 9, 10, 11, 12, 15], "down": [7, 13], "87": [7, 11], "38": [7, 11, 15], "we": [7, 9, 10, 11, 12, 13, 14, 15, 16, 17], "recommend": [7, 9, 10, 11, 12, 16], "either": [7, 11, 13], "add_pip": [7, 11, 13, 14, 16, 17], "add": [7, 11, 13, 14, 16, 17], "sota": 7, "scandin": 7, "one": [7, 11, 14, 15, 16], "isn": [7, 9, 17], "t": [7, 9, 13, 14, 16, 17], "great": 7, "yet": [7, 13], "": [7, 9, 10, 11, 13, 14, 15, 16, 17], "precis": 7, "86": [7, 11], "recal": [7, 15], "still": [7, 10, 13, 15, 17], "due": [7, 13, 17], "lack": 7, "knowledg": [7, 11, 13], "consist": [7, 10, 11], "across": [7, 11, 13], "gain": 7, "f1": [7, 11], "81": [7, 11], "79": [7, 11], "85": [7, 11], "82": [7, 11, 13], "acc": 7, "94": [7, 11], "fix": 7, "varieti": [7, 11, 16, 17], "remov": [7, 11], "warn": [7, 13, 15, 16, 17], "annotat": 7, "error": [7, 11, 13, 15, 17], "wa": [7, 13, 17], "never": 7, "space": [7, 11], "need": [7, 13, 15, 16], "what": [7, 10, 11, 14, 16], "next": [7, 17], "better": [7, 9, 10, 13, 15], "current": [7, 9, 13, 14, 17], "qid": [7, 13], "refer": [7, 8, 13, 16], "among": [7, 17], "thing": [7, 13, 17], "main": [7, 11], "examin": [7, 10, 11, 14, 16], "dansk": [7, 8, 10, 13], "6": [7, 11, 13], "10": [7, 13, 16, 17], "04": [7, 13], "three": [7, 9, 11], "let": [7, 10, 14, 15, 16, 17], "up": [7, 16, 17], "18": [7, 13], "differ": [7, 11, 13, 16, 17], "config": [7, 13], "size": [7, 9, 11, 13], "now": [7, 13, 17], "4": [7, 11, 13, 15, 17], "3": [7, 11, 13, 15, 16, 17], "01": [7, 11, 13], "find": [7, 13, 15], "chang": [7, 10, 11, 15, 16], "g": [7, 11, 13, 16], "As": [7, 10, 11, 13, 15], "longer": 7, "allow": [7, 9, 10, 11, 13, 16, 17], "dan": [7, 11, 13], "nielsen": [7, 13], "27": [7, 11], "06": 7, "22": [7, 11, 15], "spandaur": 7, "hate": [7, 12], "detect": [7, 13, 16], "classif": [7, 14, 16], "move": [7, 10], "seper": 7, "versatil": 7, "thei": [7, 11, 13], "through": [7, 13, 15, 17], "extern": 7, "augmenti": [7, 11, 15], "rule": [7, 16], "sentiment": [7, 12], "instead": 7, "asent": [7, 16], "multipl": [7, 11, 15, 16], "thu": [7, 9, 11, 13, 16], "simplifi": 7, "avoid": 7, "emot": [7, 12, 14], "subject": [7, 12, 13], "polar": [7, 12], "classifc": 7, "bugfix": 7, "11": [7, 9], "21": 7, "danlp": [7, 9, 11, 14, 15, 16], "directli": [7, 11], "huggingfac": [7, 13], "hub": 7, "faster": [7, 16], "stabl": 7, "readabl": [7, 10, 17], "modul": [7, 9, 11], "develop": 7, "hlass": 7, "thank": 7, "pr": 7, "dhpullack": 7, "07": 7, "detail": [7, 10, 13, 17], "descript": [7, 11, 13], "got": 7, "brand": [7, 11, 16], "onlin": 7, "And": [7, 12], "prettier": 7, "print": [7, 11, 13, 14, 16], "09": 7, "releas": [7, 11, 13], "first": [7, 10, 11, 12, 13, 15, 16], "few": [7, 10, 15], "design": 7, "code": 7, "behaviour": 7, "both": [7, 11, 14, 15, 16], "beauti": 7, "hand": [7, 11], "drawn": 7, "logo": 7, "offici": 7, "aarhu": [7, 11, 13], "univers": [7, 17], "03": [7, 13], "stunningli": 7, "look": [7, 11, 14, 16, 17], "site": [7, 13, 16, 17], "cover": 7, "99": [7, 11, 16], "codebas": 7, "major": 7, "oper": 7, "system": [7, 13], "just": [7, 13, 17], "linux": 7, "25": [7, 11], "senda": 7, "30": [7, 11], "effici": 7, "\u00e6l\u00e6ctra": [7, 14], "24": 7, "softwar": 7, "sequenc": 7, "introduc": [7, 10, 11], "abov": [7, 9, 10], "02": [7, 11, 13], "launch": 7, "evaul": 8, "comparison": [8, 10], "simpl": [8, 10, 17], "domain": [8, 14, 16], "conll": 8, "2003": 8, "format": [8, 15], "infer": [8, 9, 13], "speed": [8, 9, 13, 14, 17], "compar": [9, 11, 16], "repres": [9, 10, 11], "comprehens": 9, "below": [9, 11], "score": [9, 10, 11, 15, 16, 17], "best": [9, 10, 11, 13], "each": [9, 11, 13], "categori": [9, 11], "highlight": [9, 11], "bold": [9, 11], "second": [9, 11, 13, 15], "underlin": [9, 11], "cell": 9, "stanza": [9, 10], "v": 9, "batch": 9, "howev": [9, 10], "expect": [9, 10, 11], "reach": 9, "updat": [9, 10, 11, 13, 17], "unlabel": 9, "attach": 9, "denot": [9, 11], "percentag": [9, 11, 13], "word": [9, 11, 13, 16, 17], "assign": [9, 13], "correct": [9, 13], "head": [9, 13, 17], "while": [9, 10, 11, 13], "label": [9, 11, 13, 16, 17], "inform": [9, 13, 16, 17], "chapter": 9, "jurafski": 9, "martin": 9, "most": [9, 11, 13], "good": [9, 14], "altern": 9, "especi": [9, 16, 17], "cpu": [9, 17], "might": [9, 10, 11, 16], "consid": [9, 10], "interest": [9, 11, 16], "flair": [9, 10], "viabl": 9, "typic": [9, 11, 16], "benchmark": [9, 11], "tendenc": 9, "feed": [9, 11], "gold": [9, 11], "standard": [9, 11], "easier": [9, 11], "architectur": [9, 10, 11], "inflat": [9, 11], "further": [9, 11], "proberli": [9, 11], "reflect": [9, 11], "realli": [9, 11], "therefor": [9, 11], "own": [9, 10, 14, 17], "polyglot": [9, 11], "remain": 9, "conveni": 9, "give": 9, "quick": [9, 13, 17], "overview": [9, 10, 11, 12, 17], "over": [9, 11, 13], "landscap": 9, "continu": [9, 11, 16, 17], "badg": 9, "rank": 9, "note": [9, 11, 13], "strive": 9, "alwai": 9, "ideal": [9, 10, 11], "often": [9, 11, 13, 14], "too": 9, "strike": 9, "balanc": 9, "select": [9, 11], "candid": 9, "conduct": [10, 11], "thorough": [10, 11], "nuanc": [10, 11], "coupl": 10, "plausibl": 10, "peter": [10, 11], "schmeichel": [10, 11], "mener": [10, 11], "ogs\u00e5": [10, 11], "det": [10, 11, 13, 16], "landshold": [10, 11], "anno": [10, 11], "tilh\u00f8rer": [10, 11], "verdenstoppen": [10, 11], "og": [10, 11, 13], "kan": [10, 11, 13], "vind": [10, 11], "den": [10, 11, 13, 16], "kommend": [10, 11], "kamp": [10, 11], "mod": [10, 11], "england": [10, 11], "ann": [10, 11], "\u00f8stergaard": [10, 11], "ogs\u00e5l": 10, "dansi": 10, "landahold": 10, "2921": 10, "vefdrnstoppen": 10, "vond": 10, "kimmend": 10, "underli": [10, 11], "assumpt": [10, 11], "our": [10, 11, 17], "case": [10, 11, 13, 15, 16], "anna": 10, "verb": [10, 13], "context": [10, 11, 13, 16], "assum": [10, 13, 16], "wors": [10, 11, 16], "certain": 10, "minor": 10, "spell": [10, 11], "variat": [10, 11], "conclud": 10, "vulner": [10, 15], "instanc": [10, 11, 13], "hard": [10, 16], "replac": [10, 11, 15], "\u00e6": [10, 11], "\u00f8": [10, 11], "\u00e5": [10, 11], "ae": [10, 11], "oe": [10, 11], "aa": [10, 11], "histor": [10, 11], "seen": [10, 17], "limit": [10, 11, 13], "reason": [10, 11, 14], "comprehend": 10, "mejer": 10, "ogsp": 10, "landshoof": 10, "anbo": 10, "202q": 10, "tilh\u00e5rer": 10, "gerfenatop0en": 10, "lan": 10, "sen": 10, "kpmkendw": 10, "lamp": 10, "breakdown": 10, "some": [10, 11, 17], "trend": 10, "abbrevi": 10, "lead": [10, 13, 15], "degrad": [10, 13, 17], "larger": [10, 11, 13], "handl": 10, "bilstm": 10, "competit": 10, "under": [10, 11, 13], "outperform": [10, 13], "been": [10, 11, 13, 17], "togeth": [10, 13, 17], "easili": [10, 13], "accuraci": [10, 11, 13], "long": 10, "had": 10, "around": 10, "97": [10, 11], "98": [10, 11], "argu": 10, "man": 10, "2011": 10, "greater": 10, "rather": [10, 13], "la": 10, "besid": 10, "googl": 10, "colab": 10, "yourself": 10, "compet": 11, "notebook": 11, "open": 11, "replic": 11, "hvingelbi": 11, "et": [11, 13, 16], "al": 11, "2020": [11, 13], "scheme": 11, "sang": 11, "de": 11, "meulder": 11, "organ": [11, 13], "miscellaneu": 11, "similar": [11, 16, 17], "ontonot": 11, "weischedel": 11, "2013": 11, "16": 11, "plank": 11, "nest": 11, "universitet": 11, "exam": 11, "known": [11, 13], "normal": [11, 13, 17], "prior": 11, "raw": 11, "fed": 11, "loc": [11, 13], "citi": [11, 13], "road": 11, "mountain": [11, 13], "public": 11, "commerci": 11, "place": 11, "meet": 11, "point": [11, 17], "abstract": 11, "fiction": [11, 13], "charact": [11, 15], "anim": 11, "alias": 11, "org": [11, 13], "summar": [11, 13], "sort": [11, 17], "collect": [11, 13, 17], "rang": [11, 13], "compani": [11, 13], "polit": [11, 13, 16], "movement": 11, "government": 11, "bodi": [11, 13], "club": 11, "broad": 11, "event": [11, 13], "religion": [11, 14], "kendt": 11, "russisk": 11, "historiker": 11, "andronik": 11, "mirganjan": 11, "igor": 11, "klamkin": 11, "tror": [11, 16], "ikk": [11, 13, 16], "rusland": 11, "udvikl": 11, "uden": [11, 13], "en": [11, 13, 14, 16], "quot": 11, "jernn\u00e6v": 11, "confid": 11, "interv": 11, "calcul": [11, 15, 17], "bootstrap": 11, "500": [11, 17], "sampl": [11, 17], "averag": [11, 17], "da_dacy_large_trf": [11, 13], "88": 11, "9": [11, 13], "92": 11, "72": 11, "70": 11, "8": [11, 13, 15], "78": 11, "71": 11, "da_dacy_small_trf": [11, 13, 17], "75": 11, "69": 11, "68": 11, "saattrupdan": 11, "nbailab": 11, "scandi": 11, "83": 11, "93": 11, "80": 11, "73": 11, "alexandrainst": 11, "da": [11, 13, 14, 16], "66": [11, 15], "77": 11, "90": 11, "64": 11, "57": 11, "da_core_news_trf": 11, "61": 11, "da_core_news_lg": 11, "74": 11, "62": 11, "54": 11, "55": 11, "da_core_news_md": 11, "76": 11, "58": 11, "49": 11, "52": 11, "da_core_news_sm": [11, 15], "59": 11, "39": 11, "67": 11, "openai": 11, "gpt": 11, "turbo": 11, "50": 11, "41": 11, "47": 11, "63": 11, "65": 11, "worth": 11, "mention": 11, "similarli": [11, 16], "independ": 11, "strength": 11, "weak": 11, "multi": 11, "mani": 11, "those": [11, 15], "On": 11, "scandinavian": 11, "relev": 11, "strict": [11, 13, 17], "exclud": [11, 13], "gigaword": 11, "derczynski": 11, "wide": 11, "convers": [11, 13], "legal": 11, "social": [11, 16], "media": [11, 16], "web": 11, "content": 11, "wiki": 11, "book": [11, 13], "norp": [11, 13], "nation": [11, 13], "religi": [11, 13], "group": [11, 13, 14, 15], "facil": [11, 13], "airport": [11, 13], "highwai": [11, 13], "bridg": [11, 13], "etc": [11, 13], "agenc": [11, 13], "institut": [11, 13], "gpe": [11, 13], "countri": [11, 13], "non": [11, 13, 16], "water": [11, 13], "product": [11, 13, 16], "vehicl": [11, 13], "weapon": [11, 13], "food": [11, 13], "servic": [11, 13], "hurrican": [11, 13], "battl": [11, 13], "war": [11, 13], "sport": [11, 13], "work": [11, 13, 16], "OF": [11, 13], "song": [11, 13], "law": [11, 13], "concept": [11, 13], "absolut": [11, 13], "rel": [11, 13], "period": [11, 13], "smaller": [11, 13], "dai": [11, 13], "percent": [11, 13], "monei": [11, 13], "monetari": [11, 13], "valu": [11, 13], "unit": [11, 13], "quantiti": [11, 13], "weight": [11, 13], "distanc": [11, 13, 17], "ordin": [11, 13], "cardin": [11, 13], "numer": [11, 13], "fall": [11, 13], "anoth": [11, 13], "opt": 11, "interact": 11, "chart": 11, "quickli": 11, "becom": [11, 16], "unruli": 11, "hover": 11, "dot": 11, "exact": [11, 15], "100": [11, 13, 17], "nbsp": 11, "43": 11, "56": 11, "46": 11, "36": 11, "60": 11, "51": 11, "45": 11, "13": 11, "37": 11, "53": [11, 15], "48": 11, "96": 11, "da_dacy_": 11, "_ner_fine_grain": 11, "convert": 11, "geo": 11, "were": 11, "divers": 11, "encapsul": 11, "adject": 11, "2004": 11, "world": [11, 13], "cup": [11, 17], "italian": 11, "ci": 11, "df": [11, 17], "dannet": 11, "ignor": 11, "notnul": 11, "round": 11, "astyp": 11, "lower": [11, 15], "upper": 11, "drop": [11, 15], "axi": 11, "inplac": 11, "filter": 11, "set_index": 11, "latex": 11, "styler": 11, "format_index": 11, "escap": 11, "to_latex": 11, "hrule": 11, "convert_css": 11, "begin": 11, "tabular": 11, "lllll": 11, "toprul": 11, "midrul": 11, "_daci": 11, "_larg": 11, "_trf": 11, "_medium": 11, "42": 11, "34": 11, "_small": 11, "35": 11, "_ner": 11, "_fine": 11, "_grain": 11, "_core": 11, "_new": 11, "44": 11, "_lg": 11, "32": 11, "_md": 11, "28": [11, 17], "_sm": 11, "29": 11, "26": 11, "17": 11, "bottomrul": 11, "end": 11, "approach": [11, 16], "initi": 11, "short": [11, 13], "repetit": 11, "baselin": 11, "intend": 11, "inconsist": 11, "ocr": 11, "keystrok": [11, 15], "close": 11, "keyboard": [11, 15], "swap": 11, "two": [11, 13, 14, 15, 16, 17], "neighbor": 11, "simul": 11, "randomli": 11, "capit": [11, 13], "lowercas": 11, "synonym": 11, "slight": 11, "grammat": [11, 13], "wordnet": 11, "respect": 11, "syntact": [11, 13], "role": [11, 13], "embed": 11, "tend": [11, 17], "appear": 11, "ascii": 11, "letter": 11, "uppercas": 11, "noun": [11, 12], "target": 11, "sometim": 11, "singl": [11, 13, 15, 16], "hun": 11, "l\u00e6ste": 11, "g\u00e5den": 11, "she": 11, "puzzl": 11, "l\u00f8ste": 11, "solv": 11, "quit": 11, "mean": [11, 13, 17], "degre": 11, "31": 11, "know": [11, 17], "why": 11, "choos": [11, 15, 16], "One": [11, 14, 16], "wp": 11, "appl": 11, "m1": 11, "pro": 11, "16gb": 11, "maco": 11, "high": [11, 17], "consum": 11, "laptop": 11, "higher": 11, "benefit": 11, "total": 11, "sec": 11, "1438": 11, "353": 11, "770": 11, "2024": 11, "da_dacy_large_ner_fine_grain": [11, 13], "567": 11, "da_dacy_medium_ner_fine_grain": [11, 13], "1670": 11, "00": [11, 13], "da_dacy_small_ner_fine_grain": [11, 13, 17], "5717": 11, "1618": 11, "19": [11, 13], "1125": 11, "31364": 11, "32571": 11, "34624": 11, "da_core_news_": 11, "onc": [11, 13], "leon": 11, "manuel": 11, "r": [11, 17], "ciosici": 11, "rebekah": 11, "baglini": 11, "morten": 11, "h": 11, "christiansen": 11, "jacob": 11, "aarup": 11, "dalsgaard": 11, "riccardo": 11, "fusaroli": 11, "juel": 11, "henrichsen": 11, "rasmu": 11, "andrea": 11, "kirked": 11, "alex": 11, "kjeldsen": 11, "23rd": 11, "nordic": 11, "confer": 11, "linguist": 11, "nodalida": 11, "413": [11, 16], "421": 11, "arxiv": 11, "2107": 11, "05295": 11, "amali": 11, "brogaard": 11, "pauli": 11, "maria": 11, "barrett": 11, "christina": 11, "rost": 11, "malm": 11, "lidegaard": 11, "ander": 11, "gaard": 11, "12th": 11, "4597": 11, "4604": 11, "barbara": 11, "kristian": 11, "n": 11, "rgaard": 11, "jensen": 11, "rob": 11, "van": 11, "der": [11, 16], "goot": 11, "lexic": 11, "2105": 11, "11301": 11, "erik": 11, "f": [11, 13, 16], "fien": 11, "share": 11, "c": [11, 17], "0306050": 11, "ralph": 11, "martha": 11, "palmer": 11, "mitchel": 11, "marcu": 11, "eduard": 11, "hovi": 11, "sameer": 11, "pradhan": 11, "lanc": 11, "ramshaw": 11, "nianwen": 11, "xue": 11, "taylor": 11, "jeff": 11, "kaufman": 11, "michel": 11, "franchini": 11, "ldc2013t19": 11, "consortium": 11, "philadelphia": 11, "pa": 11, "unfamiliar": 12, "segment": 12, "chunk": 12, "accec": 12, "built": 13, "same": [13, 17], "structur": 13, "familiar": [13, 17], "easi": [13, 15], "don": [13, 17], "worri": 13, "trf": 13, "increas": 13, "user": [13, 15], "becaus": [13, 17], "writeabl": 13, "co": 13, "chcaa": 13, "resolv": 13, "0eadea074d5f637e76357c46bbd56451471d0154": 13, "py3": 13, "whl": 13, "101": 13, "mb": 13, "25l": 13, "90m": 13, "0m": 13, "32m0": 13, "31m": 13, "eta": 13, "36m": 13, "2k": 13, "91m": 13, "32m4": 13, "31m145": 13, "36m0": 13, "32m11": 13, "31m178": 13, "32m17": 13, "31m185": 13, "32m23": 13, "31m188": 13, "32m30": 13, "32m36": 13, "31m187": 13, "32m43": 13, "31m186": 13, "32m49": 13, "32m56": 13, "32m62": 13, "31m189": 13, "32m69": 13, "32m75": 13, "32m82": 13, "32m88": 13, "32m94": 13, "32m101": 13, "31m196": 13, "31m37": 13, "25h": 13, "successfulli": 13, "home": [13, 16, 17], "runner": [13, 16, 17], "local": [13, 16, 17], "lib": [13, 16, 17], "python3": [13, 16, 17], "910": [13, 17], "userwarn": [13, 16, 17], "w095": [13, 17], "v3": [13, 17], "mai": [13, 17], "compat": [13, 17], "newer": [13, 17], "retrain": [13, 17], "warn_msg": [13, 17], "spacy_transform": [13, 17], "layer": [13, 17], "hf_shim": [13, 17], "137": [13, 17], "save": [13, 17], "torch": [13, 17], "state_dict": [13, 17], "transform": [13, 15, 17], "attempt": [13, 17], "fallback": [13, 17], "exactli": [13, 17], "convent": 13, "syntax": 13, "henc": 13, "abl": 13, "lot": 13, "veri": [13, 15], "written": 13, "pakken": 13, "er": [13, 16], "hurtig": 13, "effektiv": 13, "til": 13, "sprogprocess": 13, "identifi": 13, "real": [13, 17], "object": [13, 14, 15, 16], "recogn": 13, "miscellan": 13, "label_": 13, "plot": [13, 17], "displaci": 13, "render": 13, "nil": 13, "q35": 13, "sinc": 13, "access": 13, "switch": 13, "simpli": [13, 14, 15, 16, 17], "blank": [13, 14, 16], "31m139": 13, "32m10": 13, "31m175": 13, "32m16": 13, "31m179": 13, "32m29": 13, "32m35": 13, "31m171": 13, "32m41": 13, "31m176": 13, "32m47": 13, "32m53": 13, "32m59": 13, "31m173": 13, "32m65": 13, "32m71": 13, "31m177": 13, "32m77": 13, "32m83": 13, "32m89": 13, "32m95": 13, "31m180": 13, "spacy_wrap": [13, 14, 16], "pipeline_component_tok_clf": 13, "tokenclassificationtransform": 13, "0x7f64fd33d780": 13, "denn": 13, "tr\u00e6net": 13, "af": 13, "fra": 13, "alexandra": 13, "instituttet": 13, "additon": 13, "slow": 13, "saw": 13, "uniqu": 13, "done": [13, 17], "u": [13, 15, 16, 17], "barack": 13, "obama": 13, "wikipedia": 13, "wikidata": 13, "disambigu": 13, "though": 13, "term": 13, "could": [13, 16, 17], "distinguish": 13, "fulli": 13, "expand": 13, "unknown": 13, "correspondig": 13, "rutechef": 13, "ivan": 13, "madsen": 13, "jeg": [13, 16], "ved": 13, "hvorfor": 13, "q830350": 13, "q16876242": 13, "famili": 13, "believ": 13, "incorrect": 13, "last": 13, "full": 13, "slhave": 13, "neural": [13, 16], "match": 13, "specifc": 13, "combin": 13, "english": 13, "client": 13, "dron": 13, "bor": 13, "k\u00f8benhavn": 13, "32m3": 13, "31m114": 13, "32m8": 13, "31m131": 13, "32m14": 13, "31m150": 13, "32m19": 13, "31m151": 13, "31m138": 13, "32m28": 13, "31m129": 13, "32m33": 13, "32m39": 13, "31m157": 13, "32m44": 13, "31m156": 13, "31m155": 13, "32m55": 13, "32m60": 13, "32m70": 13, "32m76": 13, "32m81": 13, "32m86": 13, "32m91": 13, "31m144": 13, "32m97": 13, "31m158": 13, "31m34": 13, "kb_id_": 13, "wikidata_entri": 13, "q1748": 13, "northern": 13, "europ": 13, "nordeurop\u00e6isk": 13, "land": 13, "denmark": 13, "hovedstad": 13, "imag": 13, "associ": 13, "articl": 13, "class": [13, 17], "fashion": 13, "32m5": 13, "31m164": 13, "32m18": 13, "32m24": 13, "32m31": 13, "32m37": 13, "31m184": 13, "31m183": 13, "32m50": 13, "32m63": 13, "31m191": 13, "31m38": 13, "43fedc5a1b1c1d193f461d13225f217f2ced507d": 13, "31m4": 13, "31m8": 13, "32m2": 13, "31m20": 13, "31m32": 13, "31m51": 13, "32m12": 13, "31m117": 13, "31m125": 13, "32m22": 13, "31m163": 13, "32m40": 13, "31m174": 13, "32m45": 13, "31m172": 13, "31m161": 13, "31m166": 13, "32m68": 13, "31m170": 13, "32m74": 13, "32m80": 13, "31m5": 13, "25hinstal": 13, "entityrecogn": 13, "0x7f64fc90a8f0": 13, "samt": 13, "andr": 13, "blev": 13, "d": 13, "mart": 13, "center": 13, "humant": 13, "kommun": 13, "after": 13, "statist": 13, "enabl": 13, "predict": [13, 14, 16], "produc": 13, "enough": 13, "pos_": 13, "cconj": 13, "num": 13, "pron": 13, "aux": 13, "adv": 13, "adj": 13, "adp": 13, "propn": 13, "phrase": [13, 16], "relat": [13, 15], "brown": 13, "fox": 13, "jump": 13, "lazi": 13, "dog": 13, "nsubj": 13, "nomin": 13, "fast": 13, "accur": 13, "parser": [13, 17], "tree": 13, "fritekst": 13, "cop": 13, "amod": 13, "nmod": 13, "separ": 13, "punctuat": 13, "s\u00e6tning": 13, "vigtig": 13, "del": 13, "bl": 13, "benytt": 13, "opdel": 13, "lang": 13, "tekster": 13, "mindr": 13, "bidder": 13, "mist": 13, "meningen": 13, "hvert": 13, "sent": 13, "flat": 13, "big": [13, 17], "yellow": 13, "taxi": 13, "pronoun": 13, "proper": 13, "nc": 13, "noun_chunk": 13, "inflect": 13, "analys": 13, "item": 13, "ran": 13, "machin": 13, "learn": 13, "normalis": 13, "tekst": 13, "v\u00e6re": 13, "god": [13, 14], "id\u00e9": 13, "lemma_": 13, "kunn": 13, "express": [13, 14], "chase": 13, "ball": 13, "shini": 13, "agent": 13, "chatbot": 13, "semant": 13, "represent": 13, "experiment": 13, "novemb": 13, "fik": 13, "minkavl": 13, "hen": 13, "christensen": 13, "hele": 13, "familien": 13, "chok": 13, "efter": 13, "pressem\u00f8d": 13, "han": 13, "vide": 13, "mink": 13, "skull": 13, "afliv": 13, "derm": 13, "fjernet": 13, "livsgrundlag": 13, "cluster": 13, "coref_clusters_1": 13, "encourag": 14, "violenc": 14, "toward": 14, "someth": 14, "race": 14, "sex": 14, "sexual": 14, "orient": 14, "tool": [14, 17], "incorper": 14, "wether": [14, 16], "laden": [14, 16], "classifi": [14, 16, 17], "creator": 14, "hatespeech_detect": 14, "facebook": 14, "offens": 14, "hatespeech_classif": 14, "s\u00e6rlig": 14, "opm\u00e6rksomh": 14, "personangreb": 14, "sprogbrug": 14, "spam": [14, 17], "indhold": 14, "bert": [14, 15, 16], "botxo": 14, "There": 14, "chosen": 14, "trade": 14, "off": 14, "ttack": 14, "electra": 14, "hatespeech": 14, "guscod": 14, "dkbert": 14, "pipeline_component_seq_clf": [14, 16], "sequenceclassificationtransform": [14, 16], "0x7f7c57d48220": 14, "wil": [14, 16], "is_offens": 14, "hate_speech_typ": 14, "emotion": [14, 16], "_prob": [14, 16], "suffix": [14, 16], "probabilit": [14, 16], "senil": 14, "gaml": 14, "idiot": 14, "hej": 14, "har": [14, 16], "du": 14, "haft": 14, "dag": 14, "pipe": [14, 16, 17], "_": [14, 16], "walk": 15, "evalut": 15, "ll": [15, 17], "lastli": 15, "spacy_smal": 15, "dacy_smal": 15, "straightforward": 15, "scorer": 15, "nice": 15, "spacy_baselin": 15, "dacy_baselin": 15, "wall_tim": 15, "ents_p": 15, "ents_r": 15, "ents_f": 15, "ents_per_type_loc_p": 15, "ents_per_type_loc_r": 15, "ents_per_type_loc_f": 15, "ents_per_type_misc_p": 15, "ents_per_type_misc_r": 15, "ents_per_type_misc_f": 15, "ents_per_type_per_f": 15, "ents_per_type_org_p": 15, "ents_per_type_org_r": 15, "ents_per_type_org_f": 15, "ents_excl_misc_ents_p": 15, "ents_excl_misc_ents_r": 15, "ents_excl_misc_ents_f": 15, "862225": 15, "685598": 15, "605735": 15, "643197": 15, "571429": 15, "666667": 15, "615385": 15, "628571": 15, "545455": 15, "584071": 15, "798898": 15, "677419": 15, "391304": 15, "496063": 15, "701031": 15, "622426": 15, "659394": 15, "947658": 15, "row": [15, 17], "column": [15, 17], "808233": 15, "82852": 15, "822581": 15, "82554": 15, "767241": 15, "927083": 15, "839623": 15, "764706": 15, "752066": 15, "758333": 15, "920904": 15, "720497": 15, "75817": 15, "845977": 15, "842105": 15, "844037": 15, "978324": 15, "978972": 15, "create_per_replace_augmenter_v1": 15, "lower_aug": 15, "level": 15, "female_name_dict": 15, "random": 15, "keep": 15, "force_pattern_s": 15, "pattern": 15, "firstnam": 15, "lastnam": 15, "female_aug": 15, "spacy_aug": 15, "dacy_aug": 15, "au561649": 15, "futurewarn": 15, "Not": 15, "prepend": 15, "index": 15, "futur": 15, "preserv": 15, "previou": 15, "groupbi": 15, "group_kei": 15, "adopt": 15, "silenc": 15, "lambda": 15, "x": [15, 17], "sum": 15, "pd": 15, "concat": 15, "873839": 15, "695652": 15, "286738": 15, "406091": 15, "687500": 15, "343750": 15, "458333": 15, "720000": 15, "446281": 15, "551020": 15, "412451": 15, "124224": 15, "209424": 15, "683871": 15, "242563": 15, "358108": 15, "922885": 15, "699737": 15, "828520": 15, "800000": 15, "758170": 15, "315962": 15, "607143": 15, "213262": 15, "31565": 15, "218750": 15, "330709": 15, "490566": 15, "429752": 15, "458150": 15, "245283": 15, "740741": 15, "212766": 15, "744444": 15, "153318": 15, "254269": 15, "933873": 15, "931722": 15, "710288": 15, "suffer": 15, "effect": 15, "stochast": 15, "create_keystroke_error_augmenter_v1": 15, "key_05_aug": 15, "da_qwerty_v1": 15, "spacy_kei": 15, "173135": 15, "096026": 15, "103943": 15, "099828": 15, "109890": 15, "104167": 15, "106952": 15, "060811": 15, "074380": 15, "066914": 15, "141732": 15, "073171": 15, "074534": 15, "073846": 15, "107456": 15, "112128": 15, "109742": 15, "326630": 15, "117777": 15, "116949": 15, "123656": 15, "120209": 15, "145631": 15, "156250": 15, "150754": 15, "073770": 15, "181818": 15, "066298": 15, "070175": 15, "128480": 15, "137300": 15, "132743": 15, "319308": 15, "094923": 15, "097603": 15, "102151": 15, "099825": 15, "060000": 15, "062500": 15, "061224": 15, "063830": 15, "068702": 15, "153439": 15, "089655": 15, "080745": 15, "084967": 15, "108352": 15, "109840": 15, "109091": 15, "321187": 15, "070100": 15, "123539": 15, "132616": 15, "127917": 15, "134831": 15, "125000": 15, "129730": 15, "080645": 15, "082645": 15, "081633": 15, "153465": 15, "129630": 15, "130435": 15, "130031": 15, "134737": 15, "146453": 15, "140351": 15, "313382": 15, "069810": 15, "099831": 15, "105735": 15, "102698": 15, "104762": 15, "114583": 15, "109453": 15, "033613": 15, "033058": 15, "033333": 15, "172973": 15, "067797": 15, "071006": 15, "116525": 15, "125858": 15, "121012": 15, "315617": 15, "manner": 15, "dacy_paper_repl": 15, "script": 15, "opinion": 16, "mine": 16, "determin": 16, "posit": 16, "neg": 16, "neutral": 16, "busi": 16, "monitor": 16, "feedback": 16, "rate": 16, "emploi": 16, "negat": 16, "postiv": 16, "counterpart": 16, "complex": [16, 17], "sarcasm": 16, "clear": 16, "consider": 16, "suitabl": 16, "europarl": 16, "twitter": 16, "No": 16, "gl\u00e6de": 16, "sindsro": 16, "tillid": 16, "accept": 16, "asent_da_v1": 16, "microblog": 16, "bertton": 16, "0x7feab17aa980": 16, "subjectivity_prob": 16, "analysen": 16, "viser": 16, "\u00f8konomien": 16, "bliver": 16, "forf\u00e6rdelig": 16, "d\u00e5rlig": 16, "alligevel": 16, "godt": 16, "prob": 16, "arrai": 16, "dtype": [16, 17], "float32": 16, "positv": 16, "0x7feab17a9360": 16, "polarity_prob": 16, "002": 16, "008": 16, "981": 16, "019": 16, "happi": 16, "trust": 16, "forventn": 16, "interres": 16, "overasket": 16, "m\u00e5ll\u00f8": 16, "surpris": 16, "vrede": 16, "irrit": 16, "anger": 16, "foragt": 16, "modvilj": 16, "contempt": 16, "sorg": 16, "trist": 16, "sad": 16, "frygt": 16, "bekymret": 16, "fear": 16, "emotionally_laden": 16, "emotian": 16, "wrapped_model": 16, "143": 16, "0x7feab1424700": 16, "ej": 16, "bil": 16, "s\u00e5\u00e5": 16, "flot": 16, "fuck": 16, "bare": 16, "s\u00e5": 16, "tr\u00e6l": 16, "tesla": 16, "landet": 16, "raket": 16, "p\u00e5": 16, "m\u00e5nen": 16, "vildt": 16, "tr\u00e6": 16, "haven": 16, "dependend": 16, "mega": 16, "glad": 16, "scale": 16, "neu": 16, "587": 16, "compound": 16, "5448": 16, "n_sentenc": [16, 17], "valenc": 16, "account": 16, "is_neg": 16, "516": 16, "visual": 16, "excel": 16, "intensifi": 16, "afinn": 16, "sentida": 16, "power": 17, "sm": 17, "5572": 17, "messag": 17, "categor": 17, "ham": 17, "estut": 17, "notic": 17, "hopefulli": 17, "inspir": 17, "bit": 17, "load_sms_data": 17, "go": 17, "until": 17, "jurong": 17, "crazi": 17, "ok": 17, "lar": 17, "joke": 17, "wif": 17, "oni": 17, "entri": 17, "wkly": 17, "comp": 17, "win": 17, "fa": 17, "fina": 17, "dun": 17, "sai": 17, "earli": 17, "hor": 17, "nah": 17, "think": 17, "he": 17, "goe": 17, "usf": 17, "live": 17, "aro": 17, "value_count": 17, "4825": 17, "747": 17, "count": 17, "int64": 17, "procedur": 17, "dependency_dist": 17, "dependencydist": 17, "0x7f695404b9d0": 17, "whenev": 17, "subsampl": 17, "td": 17, "extract_df": 17, "include_text": 17, "join": 17, "origin": 17, "left": 17, "flesch_reading_eas": 17, "flesch_kincaid_grad": 17, "smog": 17, "gunning_fog": 17, "automated_readability_index": 17, "coleman_liau_index": 17, "lix": 17, "rix": 17, "syllables_per_token_std": 17, "n_token": 17, "n_unique_token": 17, "proportion_unique_token": 17, "n_charact": 17, "dependency_distance_mean": 17, "dependency_distance_std": 17, "prop_adjacent_dependency_relation_mean": 17, "prop_adjacent_dependency_relation_std": 17, "5118": 17, "drive": 17, "nan": 17, "2375": 17, "thanx": 17, "2dai": 17, "goodmat": 17, "ur": 17, "rite": 17, "3688": 17, "tonight": 17, "2349": 17, "yar": 17, "els": 17, "thk": 17, "funni": 17, "4988": 17, "tell": 17, "me": 17, "coulda": 17, "val": 17, "That": 17, "With": 17, "sens": 17, "distribut": 17, "seaborn": 17, "sn": 17, "boxplot": 17, "y": 17, "ax": 17, "xlabel": 17, "ylabel": 17, "correl": 17, "strongli": 17, "encod": 17, "boolean": 17, "is_ham": 17, "metrics_correl": 17, "corrwith": 17, "sort_valu": 17, "ab": 17, "ascend": 17, "186524": 17, "183163": 17, "syllables_per_token_mean": 17, "178149": 17, "169049": 17, "166020": 17, "156076": 17, "134804": 17, "091524": 17, "token_length_median": 17, "075341": 17, "074477": 17, "float64": 17, "pretti": 17, "shorter": 17, "simpler": 17, "kdeplot": 17, "hue": 17, "fill": 17, "densiti": 17, "cool": 17, "ve": 17, "step": 17}, "objects": {"dacy.datasets": [[1, 0, 0, "-", "dane"], [1, 0, 0, "-", "names"]], "dacy.datasets.dane": [[1, 1, 1, "", "dane"]], "dacy.datasets.names": [[1, 1, 1, "", "danish_names"], [1, 1, 1, "", "female_names"], [1, 1, 1, "", "load_names"], [1, 1, 1, "", "male_names"], [1, 1, 1, "", "muslim_names"]], "dacy": [[2, 0, 0, "-", "download"], [2, 0, 0, "-", "load"]], "dacy.download": [[2, 1, 1, "", "download_model"], [2, 1, 1, "", "get_latest_version"], [2, 1, 1, "", "install"], [2, 1, 1, "", "models"]], "dacy.load": [[2, 1, 1, "", "load"], [2, 1, 1, "", "models"], [2, 1, 1, "", "where_is_my_dacy"]], "dacy.score": [[3, 0, 0, "-", "input_length"], [3, 0, 0, "-", "score"]], "dacy.score.input_length": [[3, 1, 1, "", "n_sents_score"]], "dacy.score.score": [[3, 1, 1, "", "no_misc_getter"], [3, 1, 1, "", "score"]]}, "objtypes": {"0": "py:module", "1": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"]}, "titleterms": {"advanc": 0, "tutori": 0, "dataset": 1, "daci": [1, 2, 3, 5, 12, 17], "dane": [1, 11], "name": [1, 9, 10, 11, 13], "download": 2, "load": [2, 15], "score": 3, "input_length": 3, "faq": 4, "how": 4, "do": 4, "i": [4, 9], "test": 4, "code": 4, "document": 4, "gener": [4, 11], "cite": 4, "thi": 4, "work": 4, "instal": [6, 15], "new": 7, "updat": 7, "perform": [8, 9, 11, 15], "state": [9, 11], "art": [9, 11], "comparison": [9, 11], "want": 9, "see": 9, "more": [9, 16], "metric": [9, 17], "entiti": [9, 10, 11, 13], "recognit": [9, 10, 11, 13], "what": 9, "la": 9, "ua": 9, "measur": [9, 11], "robust": [10, 11, 15], "bias": [10, 11, 15], "exampl": [10, 11], "origin": [10, 11], "femal": [10, 11], "augment": [10, 11], "5": 10, "keystrok": 10, "error": 10, "15": 10, "keytyp": 10, "part": [10, 13], "speech": [10, 13, 14], "tag": [10, 13], "depend": [10, 13], "pars": [10, 13], "simpl": 11, "you": 11, "ar": 11, "miss": 11, "model": [11, 14, 15, 16], "dansk": 11, "fine": [11, 13], "grain": [11, 13], "domain": 11, "us": [11, 12, 17], "conll": 11, "2003": 11, "format": 11, "infer": 11, "speed": 11, "gpu": 11, "acceler": 11, "refer": 11, "get": 13, "start": 13, "link": 13, "beta": 13, "featur": 13, "ner": 13, "sentenc": 13, "segment": 13, "noun": 13, "chunk": 13, "lemmat": 13, "corefer": 13, "resolut": 13, "hate": 14, "other": [14, 16], "detect": 14, "usag": 14, "evalu": 15, "packag": 15, "data": [15, 17], "estim": 15, "sentiment": 16, "analysi": [16, 17], "overview": 16, "subject": 16, "polar": 16, "emot": 16, "dictionari": 16, "base": 16, "learn": 16, "resourc": 16, "extract": 17, "from": 17, "text": 17, "textdescript": 17, "ad": 17, "compon": 17, "exploratori": 17}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"Advanced Tutorials": [[0, "advanced-tutorials"]], "Datasets": [[1, "datasets"]], "dacy.datasets.dane": [[1, "module-dacy.datasets.dane"]], "dacy.datasets.names": [[1, "module-dacy.datasets.names"]], "Downloading and Loading": [[2, "downloading-and-loading"]], "dacy.download": [[2, "module-dacy.download"]], "dacy.load": [[2, "module-dacy.load"]], "Score": [[3, "score"]], "dacy.score.score": [[3, "dacy-score-score"]], "dacy.score.input_length": [[3, "dacy-score-input-length"]], "FAQ": [[4, "faq"]], "How do I test the code?": [[4, "how-do-i-test-the-code"]], "How is the documentation generated?": [[4, "how-is-the-documentation-generated"]], "How do I cite this work?": [[4, "how-do-i-cite-this-work"]], "DaCy": [[5, "dacy"]], "Installation": [[6, "installation"]], "News and Updates": [[7, "news-and-updates"]], "Performance": [[8, "performance"]], "State of the Art Comparison": [[9, "state-of-the-art-comparison"]], "Want to see more performance metrics for Named entity recognition?": [[9, null]], "What is LAS and UAS?": [[9, null]], "Measuring Performance": [[9, null], [11, null]], "State-of-the-Art": [[9, "state-of-the-art"]], "Robustness and Biases": [[10, "robustness-and-biases"]], "Example": [[10, null], [11, null]], "Original": [[10, null], [11, null]], "Female name augmentation": [[10, null], [11, null]], "5% keystroke errors": [[10, null]], "15% keytype errors": [[10, null]], "Named entity recognition": [[10, "named-entity-recognition"]], "Part-of-speech tagging": [[10, "part-of-speech-tagging"]], "Dependency parsing": [[10, "dependency-parsing"]], "Named Entity Recognition": [[11, "named-entity-recognition"], [13, "named-entity-recognition"]], "State-of-the-Art comparison": [[11, "state-of-the-art-comparison"]], "DaNE: Simple Named Entity Recognition": [[11, "dane-simple-named-entity-recognition"]], "You are missing a model": [[11, null]], "DANSK: Fine-grained Named Entity Recognition": [[11, "dansk-fine-grained-named-entity-recognition"]], "Domain Generalization": [[11, "domain-generalization"]], "Domain generalization using CoNLL-2003 format": [[11, "domain-generalization-using-conll-2003-format"]], "Biases": [[11, "biases"]], "Robustness": [[11, "robustness"]], "Inference Speed": [[11, "inference-speed"]], "GPU Acceleration": [[11, null]], "References": [[11, "references"]], "Using DaCy": [[12, "using-dacy"]], "Getting started": [[13, "getting-started"]], "Named Entity Linking": [[13, "named-entity-linking"]], "Beta feature": [[13, null], [13, null]], "Fine-grained NER": [[13, "fine-grained-ner"]], "Parts-of-speech Tagging": [[13, "parts-of-speech-tagging"]], "Dependency Parsing": [[13, "dependency-parsing"]], "Sentence Segmentation": [[13, "sentence-segmentation"]], "Noun Chunks": [[13, "noun-chunks"]], "Lemmatization": [[13, "lemmatization"]], "Coreference Resolution": [[13, "coreference-resolution"]], "Hate Speech": [[14, "hate-speech"]], "Other models for Hate Speech detection": [[14, null]], "Usage": [[14, "usage"]], "Evaluating Robustness": [[15, "evaluating-robustness"]], "Installing packages": [[15, "installing-packages"]], "Loading models and data": [[15, "loading-models-and-data"]], "Estimating performance": [[15, "estimating-performance"]], "Estimating robustness and biases": [[15, "estimating-robustness-and-biases"]], "Sentiment Analysis": [[16, "sentiment-analysis"]], "Overview of Sentiment Models": [[16, "overview-of-sentiment-models"]], "Subjectivity": [[16, "subjectivity"]], "Polarity": [[16, "polarity"]], "Emotion": [[16, "emotion"]], "Dictionary-Based Sentiment": [[16, "dictionary-based-sentiment"]], "Learn more": [[16, null]], "Other resources": [[16, null]], "Extracting Metrics from text using TextDescriptives": [[17, "extracting-metrics-from-text-using-textdescriptives"]], "Data": [[17, "data"]], "Adding TextDescriptives components to DaCy": [[17, "adding-textdescriptives-components-to-dacy"]], "Exploratory Data Analysis": [[17, "exploratory-data-analysis"]]}, "indexentries": {"dacy.datasets.dane": [[1, "module-dacy.datasets.dane"]], "dacy.datasets.names": [[1, "module-dacy.datasets.names"]], "dane() (in module dacy.datasets.dane)": [[1, "dacy.datasets.dane.dane"]], "danish_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.danish_names"]], "female_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.female_names"]], "load_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.load_names"]], "male_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.male_names"]], "module": [[1, "module-dacy.datasets.dane"], [1, "module-dacy.datasets.names"], [2, "module-dacy.download"], [2, "module-dacy.load"], [3, "module-dacy.score.input_length"], [3, "module-dacy.score.score"]], "muslim_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.muslim_names"]], "dacy.download": [[2, "module-dacy.download"]], "dacy.load": [[2, "module-dacy.load"]], "download_model() (in module dacy.download)": [[2, "dacy.download.download_model"]], "get_latest_version() (in module dacy.download)": [[2, "dacy.download.get_latest_version"]], "install() (in module dacy.download)": [[2, "dacy.download.install"]], "load() (in module dacy.load)": [[2, "dacy.load.load"]], "models() (in module dacy.download)": [[2, "dacy.download.models"]], "models() (in module dacy.load)": [[2, "dacy.load.models"]], "where_is_my_dacy() (in module dacy.load)": [[2, "dacy.load.where_is_my_dacy"]], "dacy.score.input_length": [[3, "module-dacy.score.input_length"]], "dacy.score.score": [[3, "module-dacy.score.score"]], "n_sents_score() (in module dacy.score.input_length)": [[3, "dacy.score.input_length.n_sents_score"]], "no_misc_getter() (in module dacy.score.score)": [[3, "dacy.score.score.no_misc_getter"]], "score() (in module dacy.score.score)": [[3, "dacy.score.score.score"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["adv_tutorials", "api/dacy.datasets", "api/dacy.download", "api/dacy.score", "faq", "index", "installation", "news", "performance", "performance.general", "performance.robustness", "performance_ner", "tutorials", "tutorials/basic", "tutorials/hate-speech", "tutorials/robustness", "tutorials/sentiment", "tutorials/textdescriptives"], "filenames": ["adv_tutorials.md", "api/dacy.datasets.rst", "api/dacy.download.rst", "api/dacy.score.rst", "faq.md", "index.md", "installation.md", "news.rst", "performance.rst", "performance.general.rst", "performance.robustness.rst", "performance_ner.ipynb", "tutorials.md", "tutorials/basic.ipynb", "tutorials/hate-speech.ipynb", "tutorials/robustness.ipynb", "tutorials/sentiment.ipynb", "tutorials/textdescriptives.ipynb"], "titles": ["Advanced Tutorials", "Datasets", "Downloading and Loading", "Score", "FAQ", "DaCy", "Installation", "News and Updates", "Performance", "State of the Art Comparison", "Robustness and Biases", "Named Entity Recognition", "Using DaCy", "Getting started", "Hate Speech", "Evaluating Robustness", "Sentiment Analysis", "Extracting Metrics from text using TextDescriptives"], "terms": {"daci": [0, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "includ": [0, 1, 2, 3, 7, 9, 10, 11, 12, 13, 14, 15, 16], "seri": [0, 4, 7, 10, 11, 12, 16], "slightli": [0, 11, 15], "more": [0, 7, 10, 11, 12, 13, 15, 17], "These": [0, 2, 10, 11, 12, 14, 16], "ar": [0, 1, 2, 3, 7, 9, 10, 12, 13, 16, 17], "meant": [0, 11], "show": [0, 9, 10, 11, 13, 14, 15, 16, 17], "how": [0, 5, 10, 11, 12, 15, 16, 17], "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16], "task": [0, 7, 9, 10, 11, 13], "evalu": [0, 9, 10, 11], "integr": 0, "other": [0, 7, 9, 10, 11, 12, 13, 15, 17], "librari": [0, 4, 15, 16], "robust": [0, 5, 7, 8, 9], "instal": [0, 2, 4, 5, 7, 13, 16], "packag": [0, 2, 4, 5, 7, 13, 16, 17], "load": [0, 1, 3, 7, 13, 16, 17], "model": [0, 2, 3, 7, 9, 10, 12, 13, 17], "data": [0, 4, 7, 10, 11, 13], "estim": [0, 10, 11, 16], "perform": [0, 3, 5, 7, 10, 13, 14, 16, 17], "bias": [0, 5, 7, 8, 9, 16], "extract": [0, 3, 7, 11, 13], "metric": [0, 3, 7, 8, 11, 15], "from": [0, 1, 3, 4, 6, 7, 9, 11, 13, 15, 16], "text": [0, 2, 7, 10, 11, 13, 14, 16], "textdescript": [0, 7], "ad": [0, 7, 11, 15], "compon": [0, 3, 7, 13, 16], "exploratori": 0, "analysi": [0, 7, 10, 12, 14], "thi": [1, 3, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17], "wrap": [1, 7, 12], "read": [1, 5, 9, 10, 11], "spaci": [1, 2, 3, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17], "corpu": [1, 3, 11], "save_path": 1, "none": [1, 3, 13, 17], "split": [1, 3, 13, 15, 16], "train": [1, 3, 7, 10, 11, 12, 13, 14, 15, 16, 17], "dev": [1, 3], "test": [1, 3, 5, 7, 9, 10, 11, 13, 15, 17], "redownload": [1, 2], "fals": [1, 2, 13, 15, 16, 17], "n_sent": [1, 3], "1": [1, 2, 3, 7, 11, 13, 15, 16, 17], "open_unverified_connect": 1, "kwarg": [1, 2, 3], "sourc": [1, 2, 3, 7], "paramet": [1, 2, 3], "str": [1, 2, 3, 11], "option": [1, 2, 3, 9], "path": [1, 2], "If": [1, 2, 3, 4, 5, 9, 11, 12, 13, 16, 17], "doe": [1, 9, 11, 14, 16], "contain": [1, 3, 5, 8, 10, 11, 14, 16, 17], "i": [1, 2, 3, 5, 7, 8, 10, 11, 13, 14, 15, 16, 17], "download": [1, 7, 13, 15, 17], "folder": [1, 4], "default": [1, 2, 3, 7, 13, 15], "correspond": 1, "where_is_my_daci": [1, 2], "subfold": 1, "list": [1, 2, 3, 13, 15], "which": [1, 3, 4, 7, 9, 10, 11, 13, 15, 16], "should": [1, 2, 3, 7, 13], "return": [1, 2, 3, 15], "possibl": [1, 3, 16], "all": [1, 3, 4, 7, 9, 10, 11, 13, 17], "bool": [1, 2, 3], "int": [1, 3], "number": [1, 3, 11, 13], "sentenc": [1, 3, 12, 16, 17], "per": [1, 11, 13], "document": [1, 5, 7, 11, 13, 16, 17], "onli": [1, 7, 9, 10, 11, 13, 16, 17], "appli": [1, 3, 7, 9, 10, 11, 13, 14, 15, 16], "you": [1, 2, 3, 4, 5, 6, 7, 9, 12, 13, 14, 15, 16, 17], "an": [1, 3, 5, 7, 11, 13, 14, 15, 16], "unverifi": 1, "connect": 1, "force_extens": 1, "set": [1, 9, 10, 11, 14, 15, 16, 17], "extens": [1, 4, 7, 11, 14, 16], "doc": [1, 3, 4, 11, 13, 14, 16, 17], "regardless": [1, 2, 15], "whether": [1, 7, 14, 15, 16], "alreadi": [1, 2, 11, 13, 15, 17], "exist": [1, 7, 11, 13, 14, 16], "thereof": [1, 3], "type": [1, 2, 3, 11, 13, 14, 15, 16], "union": [1, 3], "exampl": [1, 2, 3, 13, 14, 15, 16], "import": [1, 2, 3, 9, 11, 13, 14, 15, 16, 17], "helper": 1, "function": [1, 2, 3, 7, 10, 13, 15], "dictionari": [1, 3, 12], "person": [1, 7, 10, 11, 13, 14], "augment": [1, 3, 4, 7, 15], "danish_nam": 1, "danish": [1, 3, 4, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17], "A": [1, 2, 3, 4, 7, 9, 10, 11, 13, 14], "kei": [1, 15, 17], "first_nam": 1, "last_nam": 1, "The": [1, 2, 3, 4, 7, 9, 10, 11, 13, 14, 16, 17], "deriv": [1, 11, 13], "danmark": [1, 13], "statistik": 1, "2021": [1, 4, 10, 11], "dict": [1, 3], "female_nam": [1, 15], "femal": [1, 15], "load_nam": 1, "min_count": 1, "0": [1, 2, 3, 7, 9, 11, 13, 15, 16, 17], "ethnic": 1, "gender": 1, "min_prop_gend": 1, "lookup": 1, "tabl": [1, 9, 10, 11], "muslim": [1, 11], "meldgaard": 1, "2005": 1, "http": [1, 4, 6, 7, 13], "nor": 1, "ku": 1, "dk": 1, "publikation": 1, "webpublikation": 1, "muslimske_fornavn": 1, "minimum": 1, "occur": 1, "indic": [1, 2, 3, 9, 13], "male": [1, 11, 13], "float": [1, 15], "probabl": [1, 11, 13, 16], "being": [1, 2, 7, 16], "given": [1, 3, 9, 11, 13], "specif": [1, 4, 7, 9, 11], "base": [1, 7, 10, 11, 12, 13, 14], "proport": 1, "peopl": [1, 11, 13], "when": [1, 7, 9, 10, 11, 16], "male_nam": 1, "muslim_nam": 1, "method": [2, 11, 16], "them": [2, 13], "memori": [2, 13], "download_model": 2, "forc": 2, "specifi": [2, 3], "pipelin": [2, 3, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17], "string": [2, 3], "get": [2, 5, 9, 12, 15, 16, 17], "present": 2, "locat": [2, 7, 11, 13], "da_dacy_medium_trf": [2, 11, 13], "get_latest_vers": 2, "latest": [2, 7, 13, 17], "version": [2, 3, 7, 11, 13, 17], "valid": [2, 3, 13, 17], "process": [2, 3, 4, 7, 9, 10, 11], "also": [2, 4, 5, 7, 9, 10, 11, 13, 14, 15, 16], "wish": [2, 3, 11, 15, 16], "To": [2, 4, 7, 11, 13, 14, 15, 16, 17], "see": [2, 10, 11, 13, 15, 16, 17], "avail": [2, 4, 7, 11, 13, 17], "even": [2, 10, 13], "ani": [2, 7, 11, 13, 15, 16, 17], "addit": 2, "argument": [2, 3, 15], "pass": [2, 3, 13, 17], "preprocess": [2, 5], "languag": [2, 3, 4, 5, 7, 9, 10, 11, 13, 16], "equival": [2, 16], "medium": [2, 7, 9, 11, 13], "verbos": [2, 3], "true": [2, 3, 11, 13, 15, 16, 17], "where": [2, 7, 9, 11, 13, 16, 17], "can": [2, 3, 4, 6, 7, 9, 10, 11, 13, 15, 16, 17], "configur": [2, 7], "environment": [2, 7], "variabl": [2, 7], "dacy_cache_dir": [2, 7], "toggl": [2, 3], "beta": [3, 7], "no_misc_gett": 3, "attr": 3, "util": [3, 4, 9, 11, 13, 17], "getter": 3, "entiti": [3, 5, 7, 8, 12, 15], "without": [3, 13], "misc": [3, 11, 13], "attribut": 3, "iter": 3, "span": [3, 13, 15, 16], "apply_fn": [3, 15], "score_fn": [3, 15], "token": [3, 9, 10, 11, 13, 15, 16], "po": [3, 9, 13, 15, 16], "ent": [3, 13, 15], "dep": 3, "k": [3, 4, 15], "nlp": [3, 4, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17], "potenti": 3, "callabl": 3, "wrapper": [3, 15], "take": [3, 11, 16, 17], "output": [3, 14, 15, 16], "tag": [3, 5, 7, 8, 9, 11, 12, 15], "provid": [3, 15], "four": [3, 11], "potienti": 3, "measur": [3, 13, 17], "fine": [3, 7, 8, 12], "grain": [3, 7, 8, 12], "tag_acc": [3, 15], "coars": 3, "pos_acc": [3, 15], "depend": [3, 4, 5, 7, 8, 9, 12, 15, 16, 17], "pars": [3, 5, 7, 8, 9, 12, 15, 16], "style": [3, 4, 11, 13, 16], "time": [3, 10, 11, 13, 15], "run": [3, 4, 9, 11, 13, 14, 15, 16, 17], "empti": [3, 9, 13, 14, 16], "call": [3, 15, 17], "panda": [3, 15], "datafram": [3, 15, 17], "create_lower_casing_augment": [3, 15], "dataset": [3, 11, 15, 17], "dane": [3, 7, 8, 9, 10, 15], "da_dacy_small_tft": 3, "5": [3, 7, 11, 13, 15, 16, 17], "vari": 3, "input": [3, 9, 10], "length": 3, "n_sents_scor": 3, "form": [3, 13], "score_nam": 3, "defualt": 3, "come": [4, 13, 14, 16, 17], "suit": [4, 7], "implement": [4, 9, 16], "pytest": 4, "In": [4, 9, 10, 11, 13, 15, 17], "order": 4, "have": [4, 5, 7, 10, 11, 13, 14, 17], "clone": 4, "repositori": [4, 5], "requir": [4, 7, 10, 11, 13, 15], "defin": [4, 14, 15, 16], "extras_requir": 4, "section": [4, 9, 10], "pyproject": 4, "toml": 4, "git": [4, 6], "github": [4, 6, 7, 11, 13, 15], "com": [4, 6, 7], "centr": [4, 6, 7], "human": [4, 6, 7, 10, 16], "comput": [4, 6, 7, 11, 13, 17], "pip": [4, 6, 7, 15, 16], "e": [4, 7, 11, 13, 16], "python": [4, 13, 15, 17], "m": [4, 13, 15, 17], "desired_test": 4, "py": [4, 13, 15, 16, 17], "want": [4, 10, 11, 13, 14, 15, 16, 17], "check": [4, 7, 9, 13, 16], "coverag": 4, "follow": [4, 7, 9, 10, 11, 13, 16, 17], "cov": 4, "src": [4, 15], "sphinx": 4, "It": [4, 7, 11, 13, 14, 16], "furo": 4, "theme": 4, "custom": [4, 7, 13, 16, 17], "make": [4, 10, 11, 13, 17], "html": 4, "build": [4, 11, 13, 17], "b": 4, "_build": 4, "your": [4, 7, 11, 13, 14, 15, 16, 17], "research": [4, 16], "would": [4, 5, 11, 17], "much": [4, 11], "appreci": 4, "inproceed": 4, "f975f4ce65944e3ea958578003cee622": 4, "titl": [4, 11, 13], "unifi": [4, 9, 10, 11], "framework": [4, 5, 9, 10, 11, 13, 15], "booktitl": 4, "ceur": 4, "workshop": 4, "proceed": [4, 11], "author": 4, "enevoldsen": [4, 11], "kenneth": [4, 11], "hansen": [4, 11], "lass": [4, 11], "nielbo": [4, 11], "kristoff": [4, 11], "l": 4, "date": [4, 11, 13], "volum": 4, "2989": 4, "page": [4, 9, 10, 11, 13], "206": 4, "216": 4, "publish": 4, "issn": 4, "1613": 4, "0073": 4, "keyword": 4, "low": [4, 7], "resourc": [4, 11], "natur": [4, 5, 9, 11], "Or": 4, "prefer": 4, "apa": 4, "paper": [4, 7, 9, 10, 11, 15], "publicli": 4, "here": [4, 7, 11, 13, 16, 17], "preprint": [4, 7, 11], "made": [5, 11, 13], "Its": 5, "largest": [5, 13], "ha": [5, 7, 10, 13], "achiev": [5, 13], "state": [5, 7, 8, 12, 13, 14], "art": [5, 7, 8, 12, 13, 14], "name": [5, 7, 8, 12, 14, 15, 17], "recognit": [5, 7, 8, 12], "part": [5, 7, 8, 9, 11, 12, 15, 16], "speech": [5, 7, 8, 9, 12, 15], "feel": 5, "free": [5, 17], "try": [5, 7, 11, 16, 17], "out": [5, 7, 9, 10, 13, 15, 16], "demo": [5, 7], "materi": 5, "reproduc": [5, 7], "result": [5, 11, 15, 16], "guid": [5, 10, 12], "usag": [5, 9, 12], "furthermor": [5, 16], "behavior": [5, 15], "websit": 5, "well": [5, 7, 10, 11, 12, 13], "introduct": [5, 11], "start": [5, 7, 10, 12, 15, 16, 17], "its": [5, 7, 9, 10, 11, 13, 14, 16, 17], "variou": 5, "featur": 5, "instruct": 5, "tutori": [5, 7, 10, 12, 15, 17], "uisng": 5, "faq": 5, "frequent": 5, "ask": 5, "question": [5, 13], "answer": [5, 13], "bug": [5, 13], "report": [5, 9, 13], "found": [5, 17], "pleas": [5, 10, 11, 13, 17], "issu": [5, 7, 11], "request": 5, "idea": 5, "new": [5, 10, 11], "creat": [5, 11, 14, 16], "do": [5, 7, 10, 11, 13, 17], "about": [5, 13, 16], "discuss": 5, "forum": 5, "gener": [5, 7, 8, 10, 13], "For": [5, 9, 10, 11, 13, 17], "genral": 5, "dicuss": 5, "contribut": 5, "like": [5, 11, 13, 15, 17], "guidelin": 5, "via": 6, "pypi": [6, 7], "2": [7, 11, 13, 15, 16, 17], "7": [7, 11, 13, 17], "15": 7, "05": [7, 11], "23": [7, 11], "small": [7, 10, 11, 13, 15, 17], "larg": [7, 9, 10, 11, 13, 16], "support": [7, 13], "corefer": [7, 12], "resolut": [7, 12], "link": [7, 12], "ddt": [7, 10], "treebank": [7, 15], "trainabl": 7, "lemmat": [7, 12], "notabl": [7, 9, 10, 16, 17], "improv": [7, 13], "intersect": 7, "between": [7, 9, 13, 14, 16, 17], "cdt": 7, "so": [7, 11, 13, 17], "actual": [7, 17], "less": 7, "than": [7, 10, 11, 13], "befor": [7, 13], "annot": [7, 11, 13], "dacoref": 7, "obtain": [7, 9, 10, 13, 14, 15, 16], "morpholog": 7, "84": [7, 11], "91": [7, 11], "95": [7, 11], "89": [7, 11], "reduc": [7, 10, 13], "ner": [7, 9, 10, 11, 12, 15], "down": [7, 13], "87": [7, 11], "38": [7, 11, 15], "we": [7, 9, 10, 11, 12, 13, 14, 15, 16, 17], "recommend": [7, 9, 10, 11, 12, 16], "either": [7, 11, 13], "add_pip": [7, 11, 13, 14, 16, 17], "add": [7, 11, 13, 14, 16, 17], "sota": 7, "scandin": 7, "one": [7, 11, 14, 15, 16], "isn": [7, 9, 17], "t": [7, 9, 13, 14, 16, 17], "great": 7, "yet": [7, 13], "": [7, 9, 10, 11, 13, 14, 15, 16, 17], "precis": 7, "86": [7, 11], "recal": [7, 15], "still": [7, 10, 13, 15, 17], "due": [7, 13, 17], "lack": 7, "knowledg": [7, 11, 13], "consist": [7, 10, 11], "across": [7, 11, 13], "gain": 7, "f1": [7, 11], "81": [7, 11], "79": [7, 11], "85": [7, 11], "82": [7, 11, 13], "acc": 7, "94": [7, 11], "fix": 7, "varieti": [7, 11, 16, 17], "remov": [7, 11], "warn": [7, 13, 15, 16, 17], "annotat": 7, "error": [7, 11, 13, 15, 17], "wa": [7, 13, 17], "never": 7, "space": [7, 11], "need": [7, 13, 15, 16], "what": [7, 10, 11, 14, 16, 17], "next": [7, 17], "better": [7, 9, 10, 13, 15], "current": [7, 9, 13, 14, 17], "qid": [7, 13], "refer": [7, 8, 13, 16], "among": [7, 17], "thing": [7, 13, 17], "main": [7, 11], "examin": [7, 10, 11, 14, 16], "dansk": [7, 8, 10, 13], "6": [7, 11, 13], "10": [7, 13, 16, 17], "04": 7, "three": [7, 9, 11], "let": [7, 10, 14, 15, 16, 17], "up": [7, 16, 17], "18": [7, 13], "differ": [7, 11, 13, 16, 17], "config": [7, 13], "size": [7, 9, 11, 13], "now": [7, 13, 17], "4": [7, 11, 13, 15, 17], "3": [7, 11, 13, 15, 16, 17], "01": [7, 11, 13], "find": [7, 13, 15], "chang": [7, 10, 11, 15, 16], "g": [7, 11, 13, 16], "As": [7, 10, 11, 13, 15], "longer": 7, "allow": [7, 9, 10, 11, 13, 16, 17], "dan": [7, 11, 13], "nielsen": [7, 13], "27": [7, 11], "06": 7, "22": [7, 11, 15], "spandaur": 7, "hate": [7, 12], "detect": [7, 13, 16], "classif": [7, 14, 16], "move": [7, 10], "seper": 7, "versatil": 7, "thei": [7, 11, 13], "through": [7, 13, 15, 17], "extern": 7, "augmenti": [7, 11, 15], "rule": [7, 16], "sentiment": [7, 12], "instead": 7, "asent": [7, 16], "multipl": [7, 11, 15, 16], "thu": [7, 9, 11, 13, 16], "simplifi": 7, "avoid": 7, "emot": [7, 12, 14], "subject": [7, 12, 13], "polar": [7, 12], "classifc": 7, "bugfix": 7, "11": [7, 9], "21": 7, "danlp": [7, 9, 11, 14, 15, 16], "directli": [7, 11], "huggingfac": [7, 13], "hub": 7, "faster": [7, 16], "stabl": 7, "readabl": [7, 10, 17], "modul": [7, 9, 11], "develop": 7, "hlass": 7, "thank": 7, "pr": 7, "dhpullack": 7, "07": 7, "detail": [7, 10, 13, 17], "descript": [7, 11, 13], "got": 7, "brand": [7, 11, 16], "onlin": 7, "And": [7, 12], "prettier": 7, "print": [7, 11, 13, 14, 16], "09": 7, "releas": [7, 11, 13], "first": [7, 10, 11, 12, 13, 15, 16], "few": [7, 10, 15], "design": 7, "code": 7, "behaviour": 7, "both": [7, 11, 14, 15, 16], "beauti": 7, "hand": [7, 11], "drawn": 7, "logo": 7, "offici": 7, "aarhu": [7, 11, 13], "univers": [7, 17], "03": 7, "stunningli": 7, "look": [7, 11, 14, 16, 17], "site": [7, 13, 16, 17], "cover": 7, "99": [7, 11, 16], "codebas": 7, "major": 7, "oper": 7, "system": [7, 13], "just": [7, 13, 17], "linux": 7, "25": [7, 11], "senda": 7, "30": [7, 11], "effici": 7, "\u00e6l\u00e6ctra": [7, 14], "24": 7, "softwar": 7, "sequenc": 7, "introduc": [7, 10, 11], "abov": [7, 9, 10], "02": [7, 11, 13], "launch": 7, "evaul": 8, "comparison": [8, 10], "simpl": [8, 10, 17], "domain": [8, 14, 16], "conll": 8, "2003": 8, "format": [8, 15], "infer": [8, 9, 13], "speed": [8, 9, 13, 14, 17], "compar": [9, 11, 16], "repres": [9, 10, 11], "comprehens": 9, "below": [9, 11], "score": [9, 10, 11, 15, 16, 17], "best": [9, 10, 11, 13], "each": [9, 11, 13], "categori": [9, 11], "highlight": [9, 11], "bold": [9, 11], "second": [9, 11, 13, 15], "underlin": [9, 11], "cell": 9, "stanza": [9, 10], "v": 9, "batch": 9, "howev": [9, 10], "expect": [9, 10, 11], "reach": 9, "updat": [9, 10, 11, 13, 17], "unlabel": 9, "attach": 9, "denot": [9, 11], "percentag": [9, 11, 13], "word": [9, 11, 13, 16, 17], "assign": [9, 13], "correct": [9, 13], "head": [9, 13, 17], "while": [9, 10, 11, 13], "label": [9, 11, 13, 16, 17], "inform": [9, 13, 16, 17], "chapter": 9, "jurafski": 9, "martin": 9, "most": [9, 11, 13], "good": [9, 14], "altern": 9, "especi": [9, 16, 17], "cpu": [9, 17], "might": [9, 10, 11, 16], "consid": [9, 10], "interest": [9, 11, 16], "flair": [9, 10], "viabl": 9, "typic": [9, 11, 16], "benchmark": [9, 11], "tendenc": 9, "feed": [9, 11], "gold": [9, 11], "standard": [9, 11], "easier": [9, 11], "architectur": [9, 10, 11], "inflat": [9, 11], "further": [9, 11], "proberli": [9, 11], "reflect": [9, 11], "realli": [9, 11], "therefor": [9, 11], "own": [9, 10, 14, 17], "polyglot": [9, 11], "remain": 9, "conveni": 9, "give": 9, "quick": [9, 13, 17], "overview": [9, 10, 11, 12, 17], "over": [9, 11, 13], "landscap": 9, "continu": [9, 11, 16, 17], "badg": 9, "rank": 9, "note": [9, 11, 13], "strive": 9, "alwai": 9, "ideal": [9, 10, 11], "often": [9, 11, 13, 14], "too": 9, "strike": 9, "balanc": 9, "select": [9, 11], "candid": 9, "conduct": [10, 11], "thorough": [10, 11], "nuanc": [10, 11], "coupl": 10, "plausibl": 10, "peter": [10, 11], "schmeichel": [10, 11], "mener": [10, 11], "ogs\u00e5": [10, 11], "det": [10, 11, 13, 16], "landshold": [10, 11], "anno": [10, 11], "tilh\u00f8rer": [10, 11], "verdenstoppen": [10, 11], "og": [10, 11, 13], "kan": [10, 11, 13], "vind": [10, 11], "den": [10, 11, 13, 16], "kommend": [10, 11], "kamp": [10, 11], "mod": [10, 11], "england": [10, 11], "ann": [10, 11], "\u00f8stergaard": [10, 11], "ogs\u00e5l": 10, "dansi": 10, "landahold": 10, "2921": 10, "vefdrnstoppen": 10, "vond": 10, "kimmend": 10, "underli": [10, 11], "assumpt": [10, 11], "our": [10, 11, 17], "case": [10, 11, 13, 15, 16], "anna": 10, "verb": [10, 13], "context": [10, 11, 13, 16], "assum": [10, 13, 16], "wors": [10, 11, 16], "certain": 10, "minor": 10, "spell": [10, 11], "variat": [10, 11], "conclud": 10, "vulner": [10, 15], "instanc": [10, 11, 13], "hard": [10, 16], "replac": [10, 11, 15], "\u00e6": [10, 11], "\u00f8": [10, 11], "\u00e5": [10, 11], "ae": [10, 11], "oe": [10, 11], "aa": [10, 11], "histor": [10, 11], "seen": [10, 17], "limit": [10, 11, 13], "reason": [10, 11, 14], "comprehend": 10, "mejer": 10, "ogsp": 10, "landshoof": 10, "anbo": 10, "202q": 10, "tilh\u00e5rer": 10, "gerfenatop0en": 10, "lan": 10, "sen": 10, "kpmkendw": 10, "lamp": 10, "breakdown": 10, "some": [10, 11, 17], "trend": 10, "abbrevi": 10, "lead": [10, 13, 15], "degrad": [10, 13, 17], "larger": [10, 11, 13], "handl": 10, "bilstm": 10, "competit": 10, "under": [10, 11, 13], "outperform": [10, 13], "been": [10, 11, 13], "togeth": [10, 13, 17], "easili": [10, 13], "accuraci": [10, 11, 13], "long": 10, "had": 10, "around": 10, "97": [10, 11], "98": [10, 11], "argu": 10, "man": 10, "2011": 10, "greater": 10, "rather": [10, 13], "la": 10, "besid": 10, "googl": 10, "colab": 10, "yourself": 10, "compet": 11, "notebook": 11, "open": 11, "replic": 11, "hvingelbi": 11, "et": [11, 13, 16], "al": 11, "2020": [11, 13], "scheme": 11, "sang": 11, "de": 11, "meulder": 11, "organ": [11, 13], "miscellaneu": 11, "similar": [11, 16, 17], "ontonot": 11, "weischedel": 11, "2013": 11, "16": 11, "plank": 11, "nest": 11, "universitet": 11, "exam": 11, "known": [11, 13], "normal": [11, 13, 17], "prior": 11, "raw": 11, "fed": 11, "loc": [11, 13], "citi": [11, 13], "road": 11, "mountain": [11, 13], "public": 11, "commerci": 11, "place": 11, "meet": 11, "point": [11, 17], "abstract": 11, "fiction": [11, 13], "charact": [11, 15], "anim": 11, "alias": 11, "org": [11, 13], "summar": [11, 13], "sort": 11, "collect": [11, 13, 17], "rang": [11, 13], "compani": [11, 13], "polit": [11, 13, 16], "movement": 11, "government": 11, "bodi": [11, 13], "club": 11, "broad": 11, "event": [11, 13], "religion": [11, 14], "kendt": 11, "russisk": 11, "historiker": 11, "andronik": 11, "mirganjan": 11, "igor": 11, "klamkin": 11, "tror": [11, 16], "ikk": [11, 13, 16], "rusland": 11, "udvikl": 11, "uden": [11, 13], "en": [11, 13, 14, 16], "quot": 11, "jernn\u00e6v": 11, "confid": 11, "interv": 11, "calcul": [11, 15, 17], "bootstrap": 11, "500": [11, 17], "sampl": [11, 17], "averag": [11, 17], "da_dacy_large_trf": [11, 13], "88": 11, "9": [11, 13], "92": 11, "72": 11, "70": 11, "8": [11, 13, 15], "78": 11, "71": 11, "da_dacy_small_trf": [11, 13, 17], "75": 11, "69": 11, "68": 11, "saattrupdan": 11, "nbailab": 11, "scandi": 11, "83": 11, "93": 11, "80": 11, "73": 11, "alexandrainst": 11, "da": [11, 13, 14, 16], "66": [11, 15], "77": 11, "90": 11, "64": 11, "57": 11, "da_core_news_trf": 11, "61": 11, "da_core_news_lg": 11, "74": 11, "62": 11, "54": 11, "55": 11, "da_core_news_md": 11, "76": 11, "58": 11, "49": 11, "52": 11, "da_core_news_sm": [11, 15], "59": 11, "39": 11, "67": 11, "openai": 11, "gpt": 11, "turbo": 11, "50": 11, "41": 11, "47": 11, "63": 11, "65": 11, "worth": 11, "mention": 11, "similarli": [11, 16], "independ": 11, "strength": 11, "weak": 11, "multi": 11, "mani": 11, "those": [11, 15], "On": 11, "scandinavian": 11, "relev": 11, "strict": [11, 13, 17], "exclud": [11, 13], "gigaword": 11, "derczynski": 11, "wide": 11, "convers": [11, 13], "legal": 11, "social": [11, 16], "media": [11, 16], "web": 11, "content": 11, "wiki": 11, "book": [11, 13], "norp": [11, 13], "nation": [11, 13], "religi": [11, 13], "group": [11, 13, 14, 15], "facil": [11, 13], "airport": [11, 13], "highwai": [11, 13], "bridg": [11, 13], "etc": [11, 13], "agenc": [11, 13], "institut": [11, 13], "gpe": [11, 13], "countri": [11, 13], "non": [11, 13, 16], "water": [11, 13], "product": [11, 13, 16], "vehicl": [11, 13], "weapon": [11, 13], "food": [11, 13], "servic": [11, 13], "hurrican": [11, 13], "battl": [11, 13], "war": [11, 13], "sport": [11, 13], "work": [11, 13, 16], "OF": [11, 13], "song": [11, 13], "law": [11, 13], "concept": [11, 13], "absolut": [11, 13], "rel": [11, 13], "period": [11, 13], "smaller": [11, 13], "dai": [11, 13], "percent": [11, 13], "monei": [11, 13], "monetari": [11, 13], "valu": [11, 13, 17], "unit": [11, 13], "quantiti": [11, 13], "weight": [11, 13], "distanc": [11, 13, 17], "ordin": [11, 13], "cardin": [11, 13], "numer": [11, 13], "fall": [11, 13], "anoth": [11, 13], "opt": 11, "interact": 11, "chart": 11, "quickli": 11, "becom": [11, 16], "unruli": 11, "hover": 11, "dot": 11, "exact": [11, 15], "100": [11, 13, 17], "nbsp": 11, "43": 11, "56": 11, "46": 11, "36": 11, "60": 11, "51": 11, "45": 11, "13": 11, "37": 11, "53": [11, 15], "48": 11, "96": 11, "da_dacy_": 11, "_ner_fine_grain": 11, "convert": 11, "geo": 11, "were": 11, "divers": 11, "encapsul": 11, "adject": 11, "2004": 11, "world": [11, 13], "cup": [11, 17], "italian": 11, "ci": 11, "df": [11, 17], "dannet": 11, "ignor": 11, "notnul": 11, "round": 11, "astyp": 11, "lower": [11, 15], "upper": 11, "drop": [11, 15], "axi": 11, "inplac": 11, "filter": 11, "set_index": 11, "latex": 11, "styler": 11, "format_index": 11, "escap": 11, "to_latex": 11, "hrule": 11, "convert_css": 11, "begin": 11, "tabular": 11, "lllll": 11, "toprul": 11, "midrul": 11, "_daci": 11, "_larg": 11, "_trf": 11, "_medium": 11, "42": 11, "34": 11, "_small": 11, "35": 11, "_ner": 11, "_fine": 11, "_grain": 11, "_core": 11, "_new": 11, "44": 11, "_lg": 11, "32": 11, "_md": 11, "28": [11, 17], "_sm": 11, "29": 11, "26": 11, "17": 11, "bottomrul": 11, "end": 11, "approach": [11, 16], "initi": 11, "short": [11, 13], "repetit": 11, "baselin": 11, "intend": 11, "inconsist": 11, "ocr": 11, "keystrok": [11, 15], "close": 11, "keyboard": [11, 15], "swap": 11, "two": [11, 13, 14, 15, 16, 17], "neighbor": 11, "simul": 11, "randomli": 11, "capit": [11, 13], "lowercas": 11, "synonym": 11, "slight": 11, "grammat": [11, 13], "wordnet": 11, "respect": 11, "syntact": [11, 13], "role": [11, 13], "embed": 11, "tend": [11, 17], "appear": 11, "ascii": 11, "letter": 11, "uppercas": 11, "noun": [11, 12], "target": 11, "sometim": 11, "singl": [11, 13, 15, 16], "hun": 11, "l\u00e6ste": 11, "g\u00e5den": 11, "she": 11, "puzzl": 11, "l\u00f8ste": 11, "solv": 11, "quit": 11, "mean": [11, 13, 17], "degre": 11, "31": 11, "know": [11, 17], "why": 11, "choos": [11, 15, 16], "One": [11, 14, 16], "wp": 11, "appl": 11, "m1": 11, "pro": 11, "16gb": 11, "maco": 11, "high": [11, 17], "consum": 11, "laptop": 11, "higher": 11, "benefit": 11, "total": 11, "sec": 11, "1438": 11, "353": 11, "770": 11, "2024": 11, "da_dacy_large_ner_fine_grain": [11, 13], "567": 11, "da_dacy_medium_ner_fine_grain": [11, 13], "1670": 11, "00": [11, 13], "da_dacy_small_ner_fine_grain": [11, 13, 17], "5717": 11, "1618": 11, "19": 11, "1125": 11, "31364": 11, "32571": 11, "34624": 11, "da_core_news_": 11, "onc": [11, 13], "leon": 11, "manuel": 11, "r": 11, "ciosici": 11, "rebekah": 11, "baglini": 11, "morten": 11, "h": 11, "christiansen": 11, "jacob": 11, "aarup": 11, "dalsgaard": 11, "riccardo": 11, "fusaroli": 11, "juel": 11, "henrichsen": 11, "rasmu": 11, "andrea": 11, "kirked": 11, "alex": 11, "kjeldsen": 11, "23rd": 11, "nordic": 11, "confer": 11, "linguist": 11, "nodalida": 11, "413": [11, 16], "421": 11, "arxiv": 11, "2107": 11, "05295": 11, "amali": 11, "brogaard": 11, "pauli": 11, "maria": 11, "barrett": 11, "christina": 11, "rost": 11, "malm": 11, "lidegaard": 11, "ander": 11, "gaard": 11, "12th": 11, "4597": 11, "4604": 11, "barbara": 11, "kristian": 11, "n": 11, "rgaard": 11, "jensen": 11, "rob": 11, "van": 11, "der": [11, 16], "goot": 11, "lexic": 11, "2105": 11, "11301": 11, "erik": 11, "f": [11, 13, 16], "fien": 11, "share": 11, "c": [11, 17], "0306050": 11, "ralph": 11, "martha": 11, "palmer": 11, "mitchel": 11, "marcu": 11, "eduard": 11, "hovi": 11, "sameer": 11, "pradhan": 11, "lanc": 11, "ramshaw": 11, "nianwen": 11, "xue": 11, "taylor": 11, "jeff": 11, "kaufman": 11, "michel": 11, "franchini": 11, "ldc2013t19": 11, "consortium": 11, "philadelphia": 11, "pa": 11, "unfamiliar": 12, "segment": 12, "chunk": 12, "accec": 12, "built": 13, "same": [13, 17], "structur": 13, "familiar": [13, 17], "easi": [13, 15], "don": [13, 17], "worri": 13, "trf": 13, "increas": 13, "user": [13, 15], "becaus": [13, 17], "writeabl": 13, "co": 13, "chcaa": 13, "resolv": 13, "0eadea074d5f637e76357c46bbd56451471d0154": 13, "py3": 13, "whl": 13, "101": 13, "mb": 13, "25l": 13, "90m": 13, "0m": 13, "32m0": 13, "31m": 13, "eta": 13, "36m": 13, "2k": 13, "91m": 13, "32m2": 13, "31m85": 13, "36m0": 13, "32m9": 13, "31m138": 13, "32m16": 13, "31m198": 13, "32m23": 13, "31m196": 13, "32m29": 13, "31m193": 13, "32m36": 13, "31m190": 13, "32m43": 13, "31m194": 13, "32m49": 13, "31m189": 13, "32m56": 13, "32m62": 13, "31m182": 13, "32m69": 13, "31m183": 13, "32m75": 13, "32m81": 13, "31m177": 13, "32m88": 13, "31m185": 13, "32m95": 13, "32m101": 13, "31m197": 13, "31m37": 13, "25h": 13, "successfulli": 13, "home": [13, 16, 17], "runner": [13, 16, 17], "local": [13, 16, 17], "lib": [13, 16, 17], "python3": [13, 16, 17], "910": [13, 17], "userwarn": [13, 16, 17], "w095": [13, 17], "v3": [13, 17], "mai": [13, 17], "compat": [13, 17], "newer": [13, 17], "retrain": [13, 17], "warn_msg": [13, 17], "spacy_transform": [13, 17], "layer": [13, 17], "hf_shim": [13, 17], "137": [13, 17], "save": [13, 17], "torch": [13, 17], "state_dict": [13, 17], "transform": [13, 15, 17], "attempt": [13, 17], "fallback": [13, 17], "exactli": [13, 17], "convent": 13, "syntax": 13, "henc": 13, "abl": 13, "lot": 13, "veri": [13, 15], "written": 13, "pakken": 13, "er": [13, 16], "hurtig": 13, "effektiv": 13, "til": 13, "sprogprocess": 13, "identifi": 13, "real": 13, "object": [13, 14, 15, 16], "recogn": 13, "miscellan": 13, "label_": 13, "plot": [13, 17], "displaci": 13, "render": 13, "nil": 13, "q35": 13, "sinc": 13, "access": 13, "switch": 13, "simpli": [13, 14, 15, 16, 17], "blank": [13, 14, 16], "32m5": 13, "31m164": 13, "32m12": 13, "32m19": 13, "31m200": 13, "32m26": 13, "32m33": 13, "31m201": 13, "32m39": 13, "32m46": 13, "32m53": 13, "32m60": 13, "32m67": 13, "32m74": 13, "31m202": 13, "31m199": 13, "31m38": 13, "spacy_wrap": [13, 14, 16], "pipeline_component_tok_clf": 13, "tokenclassificationtransform": 13, "0x7fb54fd02560": 13, "denn": 13, "tr\u00e6net": 13, "af": 13, "fra": 13, "alexandra": 13, "instituttet": 13, "additon": 13, "slow": 13, "saw": 13, "uniqu": 13, "done": [13, 17], "u": [13, 15, 16, 17], "barack": 13, "obama": 13, "wikipedia": 13, "wikidata": 13, "disambigu": 13, "though": 13, "term": 13, "could": [13, 16, 17], "distinguish": 13, "fulli": 13, "expand": 13, "unknown": 13, "correspondig": 13, "rutechef": 13, "ivan": 13, "madsen": 13, "jeg": [13, 16], "ved": 13, "hvorfor": 13, "q830350": 13, "q16876242": 13, "famili": 13, "believ": 13, "incorrect": 13, "last": [13, 17], "full": 13, "slhave": 13, "neural": [13, 16], "match": 13, "specifc": 13, "combin": 13, "english": 13, "client": 13, "dron": 13, "bor": 13, "k\u00f8benhavn": 13, "31m150": 13, "32m11": 13, "31m181": 13, "32m17": 13, "31m187": 13, "32m24": 13, "32m30": 13, "31m188": 13, "32m37": 13, "31m186": 13, "32m50": 13, "31m192": 13, "32m57": 13, "31m191": 13, "32m63": 13, "32m70": 13, "32m76": 13, "31m175": 13, "32m82": 13, "32m89": 13, "kb_id_": 13, "wikidata_entri": 13, "q1748": 13, "northern": 13, "europ": 13, "nordeurop\u00e6isk": 13, "land": [13, 17], "denmark": 13, "hovedstad": 13, "imag": 13, "associ": 13, "articl": 13, "class": [13, 17], "fashion": 13, "31m154": 13, "32m10": 13, "32m15": 13, "32m21": 13, "31m157": 13, "31m161": 13, "32m32": 13, "31m160": 13, "32m48": 13, "31m159": 13, "32m54": 13, "32m59": 13, "32m64": 13, "31m156": 13, "31m155": 13, "32m86": 13, "32m91": 13, "32m97": 13, "31m158": 13, "31m36": 13, "43fedc5a1b1c1d193f461d13225f217f2ced507d": 13, "32m4": 13, "31m134": 13, "31m147": 13, "31m165": 13, "32m27": 13, "31m163": 13, "32m38": 13, "31m166": 13, "31m167": 13, "32m55": 13, "32m61": 13, "32m66": 13, "32m72": 13, "32m78": 13, "31m170": 13, "31m43": 13, "25hinstal": 13, "entityrecogn": 13, "0x7fb557f77f40": 13, "samt": 13, "andr": 13, "blev": 13, "d": 13, "mart": 13, "center": 13, "humant": 13, "kommun": 13, "after": 13, "statist": 13, "enabl": 13, "predict": [13, 14, 16], "produc": 13, "enough": 13, "pos_": 13, "cconj": 13, "num": 13, "pron": 13, "aux": 13, "adv": 13, "adj": 13, "adp": 13, "propn": 13, "phrase": [13, 16], "relat": [13, 15], "brown": 13, "fox": 13, "jump": 13, "lazi": 13, "dog": 13, "nsubj": 13, "nomin": 13, "fast": 13, "accur": 13, "parser": [13, 17], "tree": 13, "fritekst": 13, "cop": 13, "amod": 13, "nmod": 13, "separ": 13, "punctuat": 13, "s\u00e6tning": 13, "vigtig": 13, "del": 13, "bl": 13, "benytt": 13, "opdel": 13, "lang": 13, "tekster": 13, "mindr": 13, "bidder": 13, "mist": 13, "meningen": 13, "hvert": 13, "sent": 13, "flat": 13, "big": [13, 17], "yellow": 13, "taxi": 13, "pronoun": 13, "proper": 13, "nc": 13, "noun_chunk": 13, "inflect": 13, "analys": 13, "item": 13, "ran": 13, "machin": 13, "learn": 13, "normalis": 13, "tekst": 13, "v\u00e6re": 13, "god": [13, 14], "id\u00e9": 13, "lemma_": 13, "kunn": 13, "express": [13, 14], "chase": 13, "ball": 13, "shini": 13, "agent": 13, "chatbot": 13, "semant": 13, "represent": 13, "experiment": 13, "novemb": 13, "fik": 13, "minkavl": 13, "hen": 13, "christensen": 13, "hele": 13, "familien": 13, "chok": 13, "efter": 13, "pressem\u00f8d": 13, "han": 13, "vide": 13, "mink": 13, "skull": 13, "afliv": 13, "derm": 13, "fjernet": 13, "livsgrundlag": 13, "cluster": 13, "coref_clusters_1": 13, "encourag": 14, "violenc": 14, "toward": 14, "someth": 14, "race": 14, "sex": 14, "sexual": 14, "orient": 14, "tool": [14, 17], "incorper": 14, "wether": [14, 16], "laden": [14, 16], "classifi": [14, 16, 17], "creator": 14, "hatespeech_detect": 14, "facebook": 14, "offens": 14, "hatespeech_classif": 14, "s\u00e6rlig": 14, "opm\u00e6rksomh": 14, "personangreb": 14, "sprogbrug": 14, "spam": [14, 17], "indhold": 14, "bert": [14, 15, 16], "botxo": 14, "There": 14, "chosen": 14, "trade": 14, "off": 14, "ttack": 14, "electra": 14, "hatespeech": 14, "guscod": 14, "dkbert": 14, "pipeline_component_seq_clf": [14, 16], "sequenceclassificationtransform": [14, 16], "0x7f8581f7b400": 14, "wil": [14, 16], "is_offens": 14, "hate_speech_typ": 14, "emotion": [14, 16], "_prob": [14, 16], "suffix": [14, 16], "probabilit": [14, 16], "senil": 14, "gaml": 14, "idiot": 14, "hej": 14, "har": [14, 16], "du": 14, "haft": 14, "dag": 14, "pipe": [14, 16, 17], "_": [14, 16], "walk": 15, "evalut": 15, "ll": [15, 17], "lastli": 15, "spacy_smal": 15, "dacy_smal": 15, "straightforward": 15, "scorer": 15, "nice": 15, "spacy_baselin": 15, "dacy_baselin": 15, "wall_tim": 15, "ents_p": 15, "ents_r": 15, "ents_f": 15, "ents_per_type_loc_p": 15, "ents_per_type_loc_r": 15, "ents_per_type_loc_f": 15, "ents_per_type_misc_p": 15, "ents_per_type_misc_r": 15, "ents_per_type_misc_f": 15, "ents_per_type_per_f": 15, "ents_per_type_org_p": 15, "ents_per_type_org_r": 15, "ents_per_type_org_f": 15, "ents_excl_misc_ents_p": 15, "ents_excl_misc_ents_r": 15, "ents_excl_misc_ents_f": 15, "862225": 15, "685598": 15, "605735": 15, "643197": 15, "571429": 15, "666667": 15, "615385": 15, "628571": 15, "545455": 15, "584071": 15, "798898": 15, "677419": 15, "391304": 15, "496063": 15, "701031": 15, "622426": 15, "659394": 15, "947658": 15, "row": [15, 17], "column": [15, 17], "808233": 15, "82852": 15, "822581": 15, "82554": 15, "767241": 15, "927083": 15, "839623": 15, "764706": 15, "752066": 15, "758333": 15, "920904": 15, "720497": 15, "75817": 15, "845977": 15, "842105": 15, "844037": 15, "978324": 15, "978972": 15, "create_per_replace_augmenter_v1": 15, "lower_aug": 15, "level": 15, "female_name_dict": 15, "random": 15, "keep": 15, "force_pattern_s": 15, "pattern": 15, "firstnam": 15, "lastnam": 15, "female_aug": 15, "spacy_aug": 15, "dacy_aug": 15, "au561649": 15, "futurewarn": 15, "Not": 15, "prepend": 15, "index": 15, "futur": 15, "preserv": 15, "previou": 15, "groupbi": 15, "group_kei": 15, "adopt": 15, "silenc": 15, "lambda": 15, "x": [15, 17], "sum": 15, "pd": 15, "concat": 15, "873839": 15, "695652": 15, "286738": 15, "406091": 15, "687500": 15, "343750": 15, "458333": 15, "720000": 15, "446281": 15, "551020": 15, "412451": 15, "124224": 15, "209424": 15, "683871": 15, "242563": 15, "358108": 15, "922885": 15, "699737": 15, "828520": 15, "800000": 15, "758170": 15, "315962": 15, "607143": 15, "213262": 15, "31565": 15, "218750": 15, "330709": 15, "490566": 15, "429752": 15, "458150": 15, "245283": 15, "740741": 15, "212766": 15, "744444": 15, "153318": 15, "254269": 15, "933873": 15, "931722": 15, "710288": 15, "suffer": 15, "effect": 15, "stochast": 15, "create_keystroke_error_augmenter_v1": 15, "key_05_aug": 15, "da_qwerty_v1": 15, "spacy_kei": 15, "173135": 15, "096026": 15, "103943": 15, "099828": 15, "109890": 15, "104167": 15, "106952": 15, "060811": 15, "074380": 15, "066914": 15, "141732": 15, "073171": 15, "074534": 15, "073846": 15, "107456": 15, "112128": 15, "109742": 15, "326630": 15, "117777": 15, "116949": 15, "123656": 15, "120209": 15, "145631": 15, "156250": 15, "150754": 15, "073770": 15, "181818": 15, "066298": 15, "070175": 15, "128480": 15, "137300": 15, "132743": 15, "319308": 15, "094923": 15, "097603": 15, "102151": 15, "099825": 15, "060000": 15, "062500": 15, "061224": 15, "063830": 15, "068702": 15, "153439": 15, "089655": 15, "080745": 15, "084967": 15, "108352": 15, "109840": 15, "109091": 15, "321187": 15, "070100": 15, "123539": 15, "132616": 15, "127917": 15, "134831": 15, "125000": 15, "129730": 15, "080645": 15, "082645": 15, "081633": 15, "153465": 15, "129630": 15, "130435": 15, "130031": 15, "134737": 15, "146453": 15, "140351": 15, "313382": 15, "069810": 15, "099831": 15, "105735": 15, "102698": 15, "104762": 15, "114583": 15, "109453": 15, "033613": 15, "033058": 15, "033333": 15, "172973": 15, "067797": 15, "071006": 15, "116525": 15, "125858": 15, "121012": 15, "315617": 15, "manner": 15, "dacy_paper_repl": 15, "script": 15, "opinion": 16, "mine": 16, "determin": 16, "posit": 16, "neg": 16, "neutral": 16, "busi": 16, "monitor": 16, "feedback": 16, "rate": 16, "emploi": 16, "negat": 16, "postiv": 16, "counterpart": 16, "complex": [16, 17], "sarcasm": 16, "clear": 16, "consider": 16, "suitabl": 16, "europarl": 16, "twitter": 16, "No": 16, "gl\u00e6de": 16, "sindsro": 16, "tillid": 16, "accept": 16, "asent_da_v1": 16, "microblog": 16, "bertton": 16, "0x7f0246bbe9e0": 16, "subjectivity_prob": 16, "analysen": 16, "viser": 16, "\u00f8konomien": 16, "bliver": 16, "forf\u00e6rdelig": 16, "d\u00e5rlig": 16, "alligevel": 16, "godt": 16, "prob": 16, "arrai": 16, "dtype": [16, 17], "float32": 16, "positv": 16, "0x7f0244f77220": 16, "polarity_prob": 16, "002": 16, "008": 16, "981": 16, "019": 16, "happi": 16, "trust": 16, "forventn": 16, "interres": 16, "overasket": 16, "m\u00e5ll\u00f8": 16, "surpris": 16, "vrede": 16, "irrit": 16, "anger": 16, "foragt": 16, "modvilj": 16, "contempt": 16, "sorg": 16, "trist": 16, "sad": 16, "frygt": 16, "bekymret": 16, "fear": 16, "emotionally_laden": 16, "emotian": 16, "wrapped_model": 16, "143": 16, "0x7f0246108e20": 16, "ej": 16, "bil": 16, "s\u00e5\u00e5": 16, "flot": 16, "fuck": 16, "bare": 16, "s\u00e5": 16, "tr\u00e6l": 16, "tesla": 16, "landet": 16, "raket": 16, "p\u00e5": 16, "m\u00e5nen": 16, "vildt": 16, "tr\u00e6": 16, "haven": 16, "dependend": 16, "mega": 16, "glad": 16, "scale": 16, "neu": 16, "587": 16, "compound": 16, "5448": 16, "n_sentenc": [16, 17], "valenc": 16, "account": 16, "is_neg": 16, "516": 16, "visual": 16, "excel": 16, "intensifi": 16, "afinn": 16, "sentida": 16, "power": 17, "sm": 17, "5572": 17, "messag": 17, "categor": 17, "ham": 17, "estut": 17, "notic": 17, "hopefulli": 17, "inspir": 17, "bit": 17, "load_sms_data": 17, "go": 17, "until": 17, "jurong": 17, "crazi": 17, "ok": 17, "lar": 17, "joke": 17, "wif": 17, "oni": 17, "entri": 17, "wkly": 17, "comp": 17, "win": 17, "fa": 17, "fina": 17, "dun": 17, "sai": 17, "earli": 17, "hor": 17, "nah": 17, "think": 17, "he": 17, "goe": 17, "usf": 17, "live": 17, "aro": 17, "value_count": 17, "4825": 17, "747": 17, "count": 17, "int64": 17, "procedur": 17, "dependency_dist": 17, "dependencydist": 17, "0x7f778c323e20": 17, "whenev": 17, "subsampl": 17, "td": 17, "extract_df": 17, "include_text": 17, "join": 17, "origin": 17, "left": 17, "token_length_mean": 17, "token_length_median": 17, "token_length_std": 17, "sentence_length_mean": 17, "sentence_length_median": 17, "sentence_length_std": 17, "syllables_per_token_mean": 17, "syllables_per_token_median": 17, "smog": 17, "gunning_fog": 17, "automated_readability_index": 17, "coleman_liau_index": 17, "lix": 17, "rix": 17, "dependency_distance_mean": 17, "dependency_distance_std": 17, "prop_adjacent_dependency_relation_mean": 17, "prop_adjacent_dependency_relation_std": 17, "2987": 17, "grinder": 17, "nan": 17, "3274": 17, "hurri": 17, "butt": 17, "hang": 17, "ca": 17, "5158": 17, "karnan": 17, "car": 17, "wait": 17, "till": 17, "5477": 17, "todai": 17, "sundai": 17, "holidai": 17, "wo": 17, "2729": 17, "urgent": 17, "09066612661": 17, "That": 17, "With": 17, "sens": 17, "distribut": 17, "seaborn": 17, "sn": 17, "boxplot": 17, "y": 17, "ax": 17, "xlabel": 17, "ylabel": 17, "correl": 17, "strongli": 17, "encod": 17, "boolean": 17, "is_ham": 17, "metrics_correl": 17, "corrwith": 17, "sort_valu": 17, "ab": 17, "ascend": 17, "numpi": 17, "function_bas": 17, "2897": 17, "runtimewarn": 17, "invalid": 17, "encount": 17, "divid": 17, "stddev": 17, "2898": 17, "n_unique_token": 17, "226968": 17, "n_token": 17, "214254": 17, "213008": 17, "211721": 17, "194998": 17, "n_charact": 17, "185756": 17, "182463": 17, "167621": 17, "153314": 17, "133126": 17, "float64": 17, "pretti": 17, "shorter": 17, "simpler": 17, "kdeplot": 17, "hue": 17, "fill": 17, "densiti": 17, "cool": 17, "ve": 17, "step": 17}, "objects": {"dacy.datasets": [[1, 0, 0, "-", "dane"], [1, 0, 0, "-", "names"]], "dacy.datasets.dane": [[1, 1, 1, "", "dane"]], "dacy.datasets.names": [[1, 1, 1, "", "danish_names"], [1, 1, 1, "", "female_names"], [1, 1, 1, "", "load_names"], [1, 1, 1, "", "male_names"], [1, 1, 1, "", "muslim_names"]], "dacy": [[2, 0, 0, "-", "download"], [2, 0, 0, "-", "load"]], "dacy.download": [[2, 1, 1, "", "download_model"], [2, 1, 1, "", "get_latest_version"], [2, 1, 1, "", "install"], [2, 1, 1, "", "models"]], "dacy.load": [[2, 1, 1, "", "load"], [2, 1, 1, "", "models"], [2, 1, 1, "", "where_is_my_dacy"]], "dacy.score": [[3, 0, 0, "-", "input_length"], [3, 0, 0, "-", "score"]], "dacy.score.input_length": [[3, 1, 1, "", "n_sents_score"]], "dacy.score.score": [[3, 1, 1, "", "no_misc_getter"], [3, 1, 1, "", "score"]]}, "objtypes": {"0": "py:module", "1": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"]}, "titleterms": {"advanc": 0, "tutori": 0, "dataset": 1, "daci": [1, 2, 3, 5, 12, 17], "dane": [1, 11], "name": [1, 9, 10, 11, 13], "download": 2, "load": [2, 15], "score": 3, "input_length": 3, "faq": 4, "how": 4, "do": 4, "i": [4, 9], "test": 4, "code": 4, "document": 4, "gener": [4, 11], "cite": 4, "thi": 4, "work": 4, "instal": [6, 15], "new": 7, "updat": 7, "perform": [8, 9, 11, 15], "state": [9, 11], "art": [9, 11], "comparison": [9, 11], "want": 9, "see": 9, "more": [9, 16], "metric": [9, 17], "entiti": [9, 10, 11, 13], "recognit": [9, 10, 11, 13], "what": 9, "la": 9, "ua": 9, "measur": [9, 11], "robust": [10, 11, 15], "bias": [10, 11, 15], "exampl": [10, 11], "origin": [10, 11], "femal": [10, 11], "augment": [10, 11], "5": 10, "keystrok": 10, "error": 10, "15": 10, "keytyp": 10, "part": [10, 13], "speech": [10, 13, 14], "tag": [10, 13], "depend": [10, 13], "pars": [10, 13], "simpl": 11, "you": 11, "ar": 11, "miss": 11, "model": [11, 14, 15, 16], "dansk": 11, "fine": [11, 13], "grain": [11, 13], "domain": 11, "us": [11, 12, 17], "conll": 11, "2003": 11, "format": 11, "infer": 11, "speed": 11, "gpu": 11, "acceler": 11, "refer": 11, "get": 13, "start": 13, "link": 13, "beta": 13, "featur": 13, "ner": 13, "sentenc": 13, "segment": 13, "noun": 13, "chunk": 13, "lemmat": 13, "corefer": 13, "resolut": 13, "hate": 14, "other": [14, 16], "detect": 14, "usag": 14, "evalu": 15, "packag": 15, "data": [15, 17], "estim": 15, "sentiment": 16, "analysi": [16, 17], "overview": 16, "subject": 16, "polar": 16, "emot": 16, "dictionari": 16, "base": 16, "learn": 16, "resourc": 16, "extract": 17, "from": 17, "text": 17, "textdescript": 17, "ad": 17, "compon": 17, "exploratori": 17}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"Advanced Tutorials": [[0, "advanced-tutorials"]], "Datasets": [[1, "datasets"]], "dacy.datasets.dane": [[1, "module-dacy.datasets.dane"]], "dacy.datasets.names": [[1, "module-dacy.datasets.names"]], "Downloading and Loading": [[2, "downloading-and-loading"]], "dacy.download": [[2, "module-dacy.download"]], "dacy.load": [[2, "module-dacy.load"]], "Score": [[3, "score"]], "dacy.score.score": [[3, "dacy-score-score"]], "dacy.score.input_length": [[3, "dacy-score-input-length"]], "FAQ": [[4, "faq"]], "How do I test the code?": [[4, "how-do-i-test-the-code"]], "How is the documentation generated?": [[4, "how-is-the-documentation-generated"]], "How do I cite this work?": [[4, "how-do-i-cite-this-work"]], "DaCy": [[5, "dacy"]], "Installation": [[6, "installation"]], "News and Updates": [[7, "news-and-updates"]], "Performance": [[8, "performance"]], "State of the Art Comparison": [[9, "state-of-the-art-comparison"]], "Want to see more performance metrics for Named entity recognition?": [[9, null]], "What is LAS and UAS?": [[9, null]], "Measuring Performance": [[9, null], [11, null]], "State-of-the-Art": [[9, "state-of-the-art"]], "Robustness and Biases": [[10, "robustness-and-biases"]], "Example": [[10, null], [11, null]], "Original": [[10, null], [11, null]], "Female name augmentation": [[10, null], [11, null]], "5% keystroke errors": [[10, null]], "15% keytype errors": [[10, null]], "Named entity recognition": [[10, "named-entity-recognition"]], "Part-of-speech tagging": [[10, "part-of-speech-tagging"]], "Dependency parsing": [[10, "dependency-parsing"]], "Named Entity Recognition": [[11, "named-entity-recognition"], [13, "named-entity-recognition"]], "State-of-the-Art comparison": [[11, "state-of-the-art-comparison"]], "DaNE: Simple Named Entity Recognition": [[11, "dane-simple-named-entity-recognition"]], "You are missing a model": [[11, null]], "DANSK: Fine-grained Named Entity Recognition": [[11, "dansk-fine-grained-named-entity-recognition"]], "Domain Generalization": [[11, "domain-generalization"]], "Domain generalization using CoNLL-2003 format": [[11, "domain-generalization-using-conll-2003-format"]], "Biases": [[11, "biases"]], "Robustness": [[11, "robustness"]], "Inference Speed": [[11, "inference-speed"]], "GPU Acceleration": [[11, null]], "References": [[11, "references"]], "Using DaCy": [[12, "using-dacy"]], "Getting started": [[13, "getting-started"]], "Named Entity Linking": [[13, "named-entity-linking"]], "Beta feature": [[13, null], [13, null]], "Fine-grained NER": [[13, "fine-grained-ner"]], "Parts-of-speech Tagging": [[13, "parts-of-speech-tagging"]], "Dependency Parsing": [[13, "dependency-parsing"]], "Sentence Segmentation": [[13, "sentence-segmentation"]], "Noun Chunks": [[13, "noun-chunks"]], "Lemmatization": [[13, "lemmatization"]], "Coreference Resolution": [[13, "coreference-resolution"]], "Hate Speech": [[14, "hate-speech"]], "Other models for Hate Speech detection": [[14, null]], "Usage": [[14, "usage"]], "Evaluating Robustness": [[15, "evaluating-robustness"]], "Installing packages": [[15, "installing-packages"]], "Loading models and data": [[15, "loading-models-and-data"]], "Estimating performance": [[15, "estimating-performance"]], "Estimating robustness and biases": [[15, "estimating-robustness-and-biases"]], "Sentiment Analysis": [[16, "sentiment-analysis"]], "Overview of Sentiment Models": [[16, "overview-of-sentiment-models"]], "Subjectivity": [[16, "subjectivity"]], "Polarity": [[16, "polarity"]], "Emotion": [[16, "emotion"]], "Dictionary-Based Sentiment": [[16, "dictionary-based-sentiment"]], "Learn more": [[16, null]], "Other resources": [[16, null]], "Extracting Metrics from text using TextDescriptives": [[17, "extracting-metrics-from-text-using-textdescriptives"]], "Data": [[17, "data"]], "Adding TextDescriptives components to DaCy": [[17, "adding-textdescriptives-components-to-dacy"]], "Exploratory Data Analysis": [[17, "exploratory-data-analysis"]]}, "indexentries": {"dacy.datasets.dane": [[1, "module-dacy.datasets.dane"]], "dacy.datasets.names": [[1, "module-dacy.datasets.names"]], "dane() (in module dacy.datasets.dane)": [[1, "dacy.datasets.dane.dane"]], "danish_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.danish_names"]], "female_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.female_names"]], "load_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.load_names"]], "male_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.male_names"]], "module": [[1, "module-dacy.datasets.dane"], [1, "module-dacy.datasets.names"], [2, "module-dacy.download"], [2, "module-dacy.load"], [3, "module-dacy.score.input_length"], [3, "module-dacy.score.score"]], "muslim_names() (in module dacy.datasets.names)": [[1, "dacy.datasets.names.muslim_names"]], "dacy.download": [[2, "module-dacy.download"]], "dacy.load": [[2, "module-dacy.load"]], "download_model() (in module dacy.download)": [[2, "dacy.download.download_model"]], "get_latest_version() (in module dacy.download)": [[2, "dacy.download.get_latest_version"]], "install() (in module dacy.download)": [[2, "dacy.download.install"]], "load() (in module dacy.load)": [[2, "dacy.load.load"]], "models() (in module dacy.download)": [[2, "dacy.download.models"]], "models() (in module dacy.load)": [[2, "dacy.load.models"]], "where_is_my_dacy() (in module dacy.load)": [[2, "dacy.load.where_is_my_dacy"]], "dacy.score.input_length": [[3, "module-dacy.score.input_length"]], "dacy.score.score": [[3, "module-dacy.score.score"]], "n_sents_score() (in module dacy.score.input_length)": [[3, "dacy.score.input_length.n_sents_score"]], "no_misc_getter() (in module dacy.score.score)": [[3, "dacy.score.score.no_misc_getter"]], "score() (in module dacy.score.score)": [[3, "dacy.score.score.score"]]}})
\ No newline at end of file
diff --git a/tutorials/basic.html b/tutorials/basic.html
index 268aa897..3c70a147 100644
--- a/tutorials/basic.html
+++ b/tutorials/basic.html
@@ -323,35 +323,35 @@ 

Getting started
  Downloading https://huggingface.co/chcaa/da_dacy_small_trf/resolve/0eadea074d5f637e76357c46bbd56451471d0154/da_dacy_small_trf-any-py3-none-any.whl (101.3 MB)
 ?25l     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/101.3 MB ? eta -:--:--
-     ━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.8/101.3 MB 145.2 MB/s eta 0:00:01
-     ━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 11.0/101.3 MB 178.9 MB/s eta 0:00:01
-     ━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 17.5/101.3 MB 185.3 MB/s eta 0:00:01
-     ━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.9/101.3 MB 188.2 MB/s eta 0:00:01
-     ━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━ 30.4/101.3 MB 188.5 MB/s eta 0:00:01
-
-

-
     ━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━ 36.9/101.3 MB 187.8 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━ 43.2/101.3 MB 186.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━ 49.7/101.3 MB 186.0 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━ 56.1/101.3 MB 187.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━ 62.7/101.3 MB 189.0 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━ 69.1/101.3 MB 188.5 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━ 75.5/101.3 MB 186.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━ 82.0/101.3 MB 187.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━ 88.4/101.3 MB 187.1 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━ 94.8/101.3 MB 187.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 196.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 196.6 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 196.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 196.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 196.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 196.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 196.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.3/101.3 MB 37.9 MB/s eta 0:00:00
+     ━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.9/101.3 MB 85.4 MB/s eta 0:00:02
+     ━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.5/101.3 MB 138.8 MB/s eta 0:00:01
+     ━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.4/101.3 MB 198.5 MB/s eta 0:00:01
+     ━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.1/101.3 MB 196.0 MB/s eta 0:00:01
+     ━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━ 29.7/101.3 MB 193.1 MB/s eta 0:00:01
+
+
+
     ━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━ 36.3/101.3 MB 190.6 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━ 43.1/101.3 MB 194.0 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━ 49.5/101.3 MB 189.9 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━ 56.1/101.3 MB 190.1 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━ 62.2/101.3 MB 182.1 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━ 69.0/101.3 MB 183.5 MB/s eta 0:00:01
+
+
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━ 75.5/101.3 MB 190.6 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━ 81.7/101.3 MB 177.5 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━ 88.4/101.3 MB 185.0 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━ 95.1/101.3 MB 190.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 197.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 197.4 MB/s eta 0:00:01
+
+
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 197.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 197.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 197.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 197.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 197.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.3/101.3 MB 37.7 MB/s eta 0:00:00
 ?25h
 
@@ -460,36 +460,34 @@

Named Entity Recognition
  Downloading https://huggingface.co/chcaa/da_dacy_small_trf/resolve/0eadea074d5f637e76357c46bbd56451471d0154/da_dacy_small_trf-any-py3-none-any.whl (101.3 MB)
 ?25l     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/101.3 MB ? eta -:--:--
-     ━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.6/101.3 MB 139.3 MB/s eta 0:00:01
-     ━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 10.8/101.3 MB 175.1 MB/s eta 0:00:01
-     ━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.9/101.3 MB 178.9 MB/s eta 0:00:01
-     ━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.1/101.3 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━ 29.0/101.3 MB 175.1 MB/s eta 0:00:01
-
- -
     ━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━ 35.0/101.3 MB 171.8 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━ 41.1/101.3 MB 176.8 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 47.3/101.3 MB 178.9 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━ 53.2/101.3 MB 175.8 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━ 59.3/101.3 MB 173.2 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━ 65.3/101.3 MB 176.0 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━ 71.5/101.3 MB 177.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━ 77.5/101.3 MB 177.2 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━ 83.6/101.3 MB 176.8 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━ 89.7/101.3 MB 177.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━ 95.9/101.3 MB 180.0 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 173.4 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 173.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 173.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 173.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 173.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 173.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 173.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.3/101.3 MB 37.5 MB/s eta 0:00:00
+     ━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.5/101.3 MB 164.5 MB/s eta 0:00:01
+     ━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.3/101.3 MB 194.8 MB/s eta 0:00:01
+     ━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 19.3/101.3 MB 200.5 MB/s eta 0:00:01
+     ━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 26.2/101.3 MB 200.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━ 33.1/101.3 MB 201.6 MB/s eta 0:00:01
+
+
+
     ━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━ 39.9/101.3 MB 197.2 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 46.8/101.3 MB 197.3 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━ 53.7/101.3 MB 200.6 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━ 60.7/101.3 MB 201.1 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━ 67.6/101.3 MB 201.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━ 74.6/101.3 MB 202.3 MB/s eta 0:00:01
+
+
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━ 81.4/101.3 MB 199.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━ 88.4/101.3 MB 197.5 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━ 95.3/101.3 MB 201.0 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 202.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 202.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 202.4 MB/s eta 0:00:01
+
+
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 202.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 202.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 202.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 202.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.3/101.3 MB 38.3 MB/s eta 0:00:00
 ?25h
 
@@ -503,7 +501,7 @@

Named Entity Recognition{"version_major": 2, "version_minor": 0, "model_id": "a6636f33e879405494e9b5c410a09b2c"}
<spacy_wrap.pipeline_component_tok_clf.TokenClassificationTransformer at 0x7f64fd33d780>
+
<spacy_wrap.pipeline_component_tok_clf.TokenClassificationTransformer at 0x7fb54fd02560>
 
@@ -573,41 +571,34 @@

Named Entity Linking
  Downloading https://huggingface.co/chcaa/da_dacy_small_trf/resolve/0eadea074d5f637e76357c46bbd56451471d0154/da_dacy_small_trf-any-py3-none-any.whl (101.3 MB)
 ?25l     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/101.3 MB ? eta -:--:--
-     ━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.8/101.3 MB 114.9 MB/s eta 0:00:01
-     ━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.9/101.3 MB 131.0 MB/s eta 0:00:01
-     ━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.1/101.3 MB 150.9 MB/s eta 0:00:01
-     ━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 19.3/101.3 MB 151.3 MB/s eta 0:00:01
-     ━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.8/101.3 MB 138.4 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━ 28.2/101.3 MB 129.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━ 33.5/101.3 MB 139.1 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━ 39.0/101.3 MB 157.8 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━ 44.3/101.3 MB 156.3 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━ 49.7/101.3 MB 155.3 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━ 55.0/101.3 MB 155.9 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━ 60.1/101.3 MB 150.9 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━ 65.4/101.3 MB 151.9 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━ 70.7/101.3 MB 156.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━ 76.1/101.3 MB 155.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━ 81.5/101.3 MB 156.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━ 86.9/101.3 MB 157.3 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━ 91.6/101.3 MB 144.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━ 97.0/101.3 MB 144.8 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.4 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.3/101.3 MB 34.4 MB/s eta 0:00:00
+     ━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.0/101.3 MB 150.1 MB/s eta 0:00:01
+     ━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 11.3/101.3 MB 181.0 MB/s eta 0:00:01
+     ━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 17.8/101.3 MB 187.1 MB/s eta 0:00:01
+     ━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.4/101.3 MB 189.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━ 30.9/101.3 MB 188.8 MB/s eta 0:00:01
+
+
+
     ━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━ 37.3/101.3 MB 186.9 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━ 43.9/101.3 MB 187.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━ 50.6/101.3 MB 192.2 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━ 57.1/101.3 MB 191.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━ 63.8/101.3 MB 192.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━ 70.1/101.3 MB 186.6 MB/s eta 0:00:01
+
+
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━ 76.2/101.3 MB 175.6 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━ 82.7/101.3 MB 186.9 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━ 89.4/101.3 MB 192.0 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━ 95.9/101.3 MB 191.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 192.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 192.8 MB/s eta 0:00:01
+
+
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 192.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 192.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 192.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 192.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.3/101.3 MB 38.7 MB/s eta 0:00:00
 ?25h
 
@@ -770,34 +761,39 @@

Fine-grained NER
  Downloading https://huggingface.co/chcaa/da_dacy_small_trf/resolve/0eadea074d5f637e76357c46bbd56451471d0154/da_dacy_small_trf-any-py3-none-any.whl (101.3 MB)
 ?25l     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/101.3 MB ? eta -:--:--
-     ━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.5/101.3 MB 164.1 MB/s eta 0:00:01
-     ━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 11.8/101.3 MB 185.2 MB/s eta 0:00:01
-     ━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/101.3 MB 186.1 MB/s eta 0:00:01
-     ━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.8/101.3 MB 188.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━ 31.2/101.3 MB 188.2 MB/s eta 0:00:01
+     ━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.1/101.3 MB 154.2 MB/s eta 0:00:01
+     ━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 10.3/101.3 MB 154.2 MB/s eta 0:00:01
+     ━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 15.8/101.3 MB 154.6 MB/s eta 0:00:01
+     ━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.1/101.3 MB 157.1 MB/s eta 0:00:01
+     ━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━ 26.7/101.3 MB 161.0 MB/s eta 0:00:01
+
+

+
     ━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━ 32.2/101.3 MB 160.0 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━ 37.6/101.3 MB 157.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━ 43.0/101.3 MB 157.4 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━ 48.5/101.3 MB 159.0 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━ 54.0/101.3 MB 160.1 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━ 59.4/101.3 MB 159.1 MB/s eta 0:00:01
 
-
     ━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━ 37.4/101.3 MB 184.2 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━ 43.9/101.3 MB 183.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━ 50.4/101.3 MB 188.9 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━ 56.9/101.3 MB 189.1 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━ 63.4/101.3 MB 188.6 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━ 69.9/101.3 MB 188.5 MB/s eta 0:00:01
+
     ━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━ 64.7/101.3 MB 156.9 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━ 70.1/101.3 MB 155.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━ 75.6/101.3 MB 157.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━ 81.0/101.3 MB 157.3 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━ 86.3/101.3 MB 156.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━ 91.7/101.3 MB 156.4 MB/s eta 0:00:01
 
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━ 76.2/101.3 MB 184.9 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━ 82.7/101.3 MB 185.2 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━ 89.2/101.3 MB 188.4 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━ 95.7/101.3 MB 187.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 191.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 191.5 MB/s eta 0:00:01
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━ 97.0/101.3 MB 155.3 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.1 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.1 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.1 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.1 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.1 MB/s eta 0:00:01
 
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 191.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 191.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 191.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 191.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.3/101.3 MB 38.1 MB/s eta 0:00:00
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 101.3/101.3 MB 158.1 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.3/101.3 MB 36.3 MB/s eta 0:00:00
 ?25h
 
@@ -807,98 +803,32 @@

Fine-grained NER
  Downloading https://huggingface.co/chcaa/da_dacy_small_ner_fine_grained/resolve/43fedc5a1b1c1d193f461d13225f217f2ced507d/da_dacy_small_ner_fine_grained-any-py3-none-any.whl (82.7 MB)
 ?25l     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/82.7 MB ? eta -:--:--
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.1/82.7 MB 4.5 MB/s eta 0:00:19
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.5/82.7 MB 8.3 MB/s eta 0:00:10
-     ━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/82.7 MB 20.8 MB/s eta 0:00:04
-     ━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.4/82.7 MB 32.0 MB/s eta 0:00:03
-     ━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.7/82.7 MB 51.5 MB/s eta 0:00:02
-
-

-
     ━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.7/82.7 MB 117.9 MB/s eta 0:00:01
-     ━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 17.7/82.7 MB 125.0 MB/s eta 0:00:01
-     ━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 22.6/82.7 MB 139.2 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━ 28.3/82.7 MB 155.1 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━ 33.7/82.7 MB 163.8 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 40.0/82.7 MB 174.6 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━ 45.8/82.7 MB 172.2 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━ 50.9/82.7 MB 161.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━ 56.9/82.7 MB 166.0 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━ 62.7/82.7 MB 171.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━ 68.6/82.7 MB 170.9 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━ 74.6/82.7 MB 172.3 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺ 80.8/82.7 MB 178.5 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-
-
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
+     ━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.5/82.7 MB 134.4 MB/s eta 0:00:01
+     ━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 10.1/82.7 MB 147.9 MB/s eta 0:00:01
+     ━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 15.8/82.7 MB 165.6 MB/s eta 0:00:01
+     ━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.4/82.7 MB 165.9 MB/s eta 0:00:01
+     ━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━━━━━━ 27.0/82.7 MB 163.4 MB/s eta 0:00:01
 
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
+
     ━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━ 32.5/82.7 MB 161.5 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━━ 38.1/82.7 MB 160.5 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━ 43.9/82.7 MB 166.0 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━ 49.6/82.7 MB 167.6 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━ 55.4/82.7 MB 167.5 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━ 61.0/82.7 MB 165.6 MB/s eta 0:00:01
 
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━ 66.6/82.7 MB 163.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━ 72.3/82.7 MB 164.8 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━ 78.0/82.7 MB 166.9 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 170.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 170.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 170.7 MB/s eta 0:00:01
 
-
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 179.7 MB/s eta 0:00:01
-     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 82.7/82.7 MB 5.5 MB/s eta 0:00:00
+
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 170.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 82.7/82.7 MB 170.7 MB/s eta 0:00:01
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 82.7/82.7 MB 43.9 MB/s eta 0:00:00
 ?25hInstalling collected packages: da-dacy-small-ner-fine-grained
 
@@ -909,7 +839,7 @@

Fine-grained NER
<spacy.pipeline.ner.EntityRecognizer at 0x7f64fc90a8f0>
+
<spacy.pipeline.ner.EntityRecognizer at 0x7fb557f77f40>
 
@@ -1011,7 +941,7 @@

Dependency Parsing -
+
DaCy PROPN @@ -1053,57 +983,57 @@

Dependency Parsing - + - nsubj + nsubj - + - cop + cop - + - det + det - + - amod + amod - + - case + case - + - amod + amod - + - nmod + nmod @@ -1274,7 +1204,7 @@

Coreference Resolution - + diff --git a/tutorials/hate-speech.html b/tutorials/hate-speech.html index bc7e63da..7f44ed04 100644 --- a/tutorials/hate-speech.html +++ b/tutorials/hate-speech.html @@ -336,7 +336,7 @@

Usage# Hide code cell output
-
<spacy_wrap.pipeline_component_seq_clf.SequenceClassificationTransformer at 0x7f7c57d48220>
+
<spacy_wrap.pipeline_component_seq_clf.SequenceClassificationTransformer at 0x7f8581f7b400>
 
@@ -459,7 +459,7 @@

Usage# - + diff --git a/tutorials/sentiment.html b/tutorials/sentiment.html index 9ca76460..5016c55b 100644 --- a/tutorials/sentiment.html +++ b/tutorials/sentiment.html @@ -345,7 +345,7 @@

SubjectivityHide code cell output
-
<spacy_wrap.pipeline_component_seq_clf.SequenceClassificationTransformer at 0x7feab17aa980>
+
<spacy_wrap.pipeline_component_seq_clf.SequenceClassificationTransformer at 0x7f0246bbe9e0>
 
@@ -400,7 +400,7 @@

PolarityHide code cell output
-
<spacy_wrap.pipeline_component_seq_clf.SequenceClassificationTransformer at 0x7feab17a9360>
+
<spacy_wrap.pipeline_component_seq_clf.SequenceClassificationTransformer at 0x7f0244f77220>
 
@@ -467,11 +467,11 @@

EmotionHide code cell output
-
/home/runner/.local/lib/python3.10/site-packages/dacy/sentiment/wrapped_models.py:143: UserWarning: The 'emotion' component assumes the 'emotionally_laden' extension is set. To set it you can run  nlp.add_pipe('dacy/emotionally_laden')
+
/home/runner/.local/lib/python3.10/site-packages/dacy/sentiment/wrapped_models.py:143: UserWarning: The 'emotion' component assumes the 'emotionally_laden' extension is set. To set it you can run  nlp.add_pipe('dacy/emotionally_laden')
   warn(
 
-
<spacy_wrap.pipeline_component_seq_clf.SequenceClassificationTransformer at 0x7feab1424700>
+
<spacy_wrap.pipeline_component_seq_clf.SequenceClassificationTransformer at 0x7f0246108e20>
 
@@ -633,7 +633,7 @@

Dictionary-Based Sentiment -
+
jeg 0.0 @@ -665,17 +665,17 @@

Dictionary-Based Sentiment - + - intensified by + intensified by - + - negated by + negated by @@ -780,7 +780,7 @@

Dictionary-Based Sentiment - + diff --git a/tutorials/textdescriptives.html b/tutorials/textdescriptives.html index a7f88a97..c6fc295c 100644 --- a/tutorials/textdescriptives.html +++ b/tutorials/textdescriptives.html @@ -392,7 +392,7 @@

Adding TextDescriptives components to DaCy
<textdescriptives.components.dependency_distance.DependencyDistance at 0x7f695404b9d0>
+
<textdescriptives.components.dependency_distance.DependencyDistance at 0x7f778c323e20>
 
@@ -449,21 +449,21 @@

Adding TextDescriptives components to DaCy
<Axes: xlabel='label', ylabel='lix'>
 

-../_images/38d1469220ed84d85d484b6f8f1a356337fa74308cc504285292a59f305b2db8.png +../_images/102e78040ff456694f0069c23d106300b6047f1c7b9b0a212eb5aecf969dd07b.png

Let’s run a quick test to see if any of our metrics correlate strongly with the label

@@ -630,16 +630,22 @@

Exploratory Data Analysis -

We can do a similar thing for the lix score, where we see that here isn’t a big difference between the two classes:

@@ -671,7 +677,7 @@

Exploratory Data Analysis
<Axes: xlabel='lix', ylabel='Density'>
 

-../_images/80d8f5ea4364e618ca9620a3b1cbb525c23ad4a3bd80652d02453eca44b61fe3.png +../_images/43c8357fc985747deaaaa078aee54fc4146152e054fbd8b7265f9fed7be2a9da.png

Cool! We’ve now done a quick analysis of the SMS dataset and found some differences in the distributions of some readability and dependency-distance metrics between the actual SMS’s and spam.