Surkal · Surkal · Jan 8, 2022 · Jan 8, 2022 · Jan 8, 2022 · Jan 9, 2022
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,5 @@
+pytest==6.1.1
+wikitextparser==0.47.0
+coverage==5.3
+requests==2.24.0
+pyquery==1.4.1
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,3 @@
-pytest==6.1.1
 wikitextparser==0.47.0
-coverage==5.3
 requests==2.24.0
 pyquery==1.4.1
diff --git a/wiktionnaireparser/parser.py b/wiktionnaireparser/parser.py
@@ -98,14 +98,15 @@ def _find_lang_sections_id(self):
         self.sections_id = {}
         for section in lang.getnext().getchildren():  # 'li'
             section_id = section.find('a').attrib['href']
-            # Subsections?
-            if section.find('ul') is None:
-                self.sections_id[section_id] = []
-                continue
-            subsections = []
-            for subsection in section.find('ul'):
-                subsections.append(subsection.find('a').attrib['href'])
-            self.sections_id[section_id] = subsections
+            if not "*" in section_id and not "_" in section_id:
+                # Subsections?
+                if section.find('ul') is None:
+                    self.sections_id[section_id] = []
+                    continue
+                subsections = []
+                for subsection in section.find('ul'):
+                    subsections.append(subsection.find('a').attrib['href'])
+                self.sections_id[section_id] = subsections
 
         return self.sections_id
 
@@ -132,7 +133,7 @@ def get_parts_of_speech(self):
         parts_of_speech = {}
         useless_sections = (
             r'Étymologie', r'Prononciation', r'Références', r'Voir_aussi',
-            r'Anagrammes', r'Liens_externes'
+            r'Anagrammes', r'Liens_externes', r'Erreurs*', r'=_Synonymes'
         )
         sections = filter_sections_id(self.sections_id.keys(), useless_sections)
         for section_name in sections:
@@ -195,22 +196,23 @@ def get_definitions(self, part_of_speech):
             part_of_speech = '#' + part_of_speech.replace(' ', '_')
         text = self._query.find(part_of_speech)[0]
         text = text.getparent()
-        while text.tag != 'ol':
+        while text is not None and text.tag != 'ol':
             # ligne de forme
             if text.tag in ('p', 'span'):
                 self.ligne_de_forme(text)
             text = text.getnext()
-        for i, definition_bloc in enumerate(text.getchildren()):
-            raw = definition_bloc.text_content()
-            definition = raw.split('\n')[0]
-            # Catching examples
-            examples = get_examples(definition_bloc)
-            definitions[i] = {'definition': definition}
-            if examples:
-                definitions[i]['examples'] = examples
-            if definition_bloc.find('ol') is not None:
-                subdefinitions = get_subdefinitions(definition_bloc.find('ol'))
-                definitions[i]['subdefinitions'] = subdefinitions
+        if text is not None:
+            for i, definition_bloc in enumerate(text.getchildren()):
+                raw = definition_bloc.text_content()
+                definition = raw.split('\n')[0]
+                # Catching examples
+                examples = get_examples(definition_bloc)
+                definitions[i] = {'definition': definition}
+                if examples:
+                    definitions[i]['examples'] = examples
+                if definition_bloc.find('ol') is not None:
+                    subdefinitions = get_subdefinitions(definition_bloc.find('ol'))
+                    definitions[i]['subdefinitions'] = subdefinitions
         return definitions
 
     def get_etymology(self):
@@ -274,11 +276,12 @@ def get_translations(self, translation_id):
         lines = section.getnext().cssselect('li')
 
         for line in lines:
+            language = line.find('span')
             translations = []
-            language = line.find('span').text_content()
-            links = line.cssselect('bdi a')
-
-            for link in links:
+            if language is not None:
+                language = language.text_content()
+                links = line.cssselect('bdi a')
+                for link in links:
                 translations.append(link.text_content())
             result[language] = translations
         return result

diff --git a/wiktionnaireparser/utils.py b/wiktionnaireparser/utils.py
@@ -4,6 +4,8 @@
 import json
 from contextlib import suppress
 
+from lxml.html import HtmlComment
+
 
 def etymology_cleaner(etymology):
     """
@@ -34,25 +36,26 @@ def extract_related_words(section):
     """Extract related words."""
     related = {}
     count = 0
-    while section.tag not in ('h3', 'h4'):
+    while section is not None and section.tag not in ('h3', 'h4'):
         words = []
         description = ''
-        if section.cssselect('.NavContent'):
-            with suppress(IndexError):
-                description = section.cssselect('.NavHead')[0].text_content()
-            for link in section.cssselect('.NavContent a'):
-                if 'Annexe:' in link.attrib.get('href'):
-                    continue
-                words.append(link.text_content())
+        if not isinstance(section, HtmlComment):
+            if section.cssselect('.NavContent'):
+                with suppress(IndexError):
+                    description = section.cssselect('.NavHead')[0].text_content()
+                for link in section.cssselect('.NavContent a'):
+                    if 'Annexe:' in link.attrib.get('href'):
+                        continue
+                    words.append(link.text_content())
 
-        else:
-            for link in section.cssselect('a'):
-                if 'Annexe:' in link.attrib.get('href'):
-                    continue
-                words.append(link.text_content())
-        related[count] = {}
-        related[count]['description'] = description
-        related[count]['words'] = words
+            else:
+                for link in section.cssselect('a'):
+                    if 'Annexe:' in link.attrib.get('href'):
+                        continue
+                    words.append(link.text_content())
+            related[count] = {}
+            related[count]['description'] = description
+            related[count]['words'] = words
         section = section.getnext()
         count += 1
     return related