From 5539dcf1c9fe360a2d6429d8f9af5299cbde2918 Mon Sep 17 00:00:00 2001
From: Lambert Rosique <lambertrosique@hotmail.com>
Date: Sat, 8 Jan 2022 19:30:18 +0100
Subject: [PATCH 1/7] Fix issue #19

---
 wiktionnaireparser/parser.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/wiktionnaireparser/parser.py b/wiktionnaireparser/parser.py
index fbdffb5..244ddf1 100644
--- a/wiktionnaireparser/parser.py
+++ b/wiktionnaireparser/parser.py
@@ -196,22 +196,23 @@ def get_definitions(self, part_of_speech):
             part_of_speech = '#' + part_of_speech.replace(' ', '_')
         text = self._query.find(part_of_speech)[0]
         text = text.getparent()
-        while text.tag != 'ol':
+        while text is not None and text.tag != 'ol':
             # ligne de forme
             if text.tag == 'p' or text.tag == 'span':
                 self.ligne_de_forme(text)
             text = text.getnext()
-        for i, definition_bloc in enumerate(text.getchildren()):
-            raw = definition_bloc.text_content()
-            definition = raw.split('\n')[0]
-            # Catching examples
-            examples = get_examples(definition_bloc)
-            definitions[i] = {'definition': definition}
-            if examples:
-                definitions[i]['examples'] = examples
-            if definition_bloc.find('ol'):
-                subdefinitions = get_subdefinitions(definition_bloc.find('ol'))
-                definitions[i]['subdefinitions'] = subdefinitions
+        if text is not None:
+            for i, definition_bloc in enumerate(text.getchildren()):
+                raw = definition_bloc.text_content()
+                definition = raw.split('\n')[0]
+                # Catching examples
+                examples = get_examples(definition_bloc)
+                definitions[i] = {'definition': definition}
+                if examples:
+                    definitions[i]['examples'] = examples
+                if definition_bloc.find('ol'):
+                    subdefinitions = get_subdefinitions(definition_bloc.find('ol'))
+                    definitions[i]['subdefinitions'] = subdefinitions
         return definitions
 
     def get_etymology(self):

From 148681c7c791d4666e0a3a9fcf34a3c1cce47492 Mon Sep 17 00:00:00 2001
From: Lambert Rosique <lambertrosique@hotmail.com>
Date: Sat, 8 Jan 2022 20:05:07 +0100
Subject: [PATCH 2/7] Fix Issue #19

---
 wiktionnaireparser/utils.py | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/wiktionnaireparser/utils.py b/wiktionnaireparser/utils.py
index 2f01461..f412f5a 100644
--- a/wiktionnaireparser/utils.py
+++ b/wiktionnaireparser/utils.py
@@ -1,6 +1,7 @@
 import re
 import json
 from contextlib import suppress
+from lxml.html import HtmlComment
 
 
 def etymology_cleaner(etymology):
@@ -31,25 +32,26 @@ def extract_related_words(section):
     """Extract related words."""
     related = {}
     count = 0
-    while section.tag != 'h3' and section.tag != 'h4':
+    while section is not None and section.tag != 'h3' and section.tag != 'h4':
         words = []
         description = ''
-        if section.cssselect('.NavContent'):
-            with suppress(IndexError):
-                description = section.cssselect('.NavHead')[0].text_content()
-            for link in section.cssselect('.NavContent a'):
-                if 'Annexe:' in link.attrib.get('href'):
-                    continue
-                words.append(link.text_content())
+        if not type(section) is HtmlComment:
+            if section.cssselect('.NavContent'):
+                with suppress(IndexError):
+                    description = section.cssselect('.NavHead')[0].text_content()
+                for link in section.cssselect('.NavContent a'):
+                    if 'Annexe:' in link.attrib.get('href'):
+                        continue
+                    words.append(link.text_content())
 
-        else:
-            for link in section.cssselect('a'):
-                if 'Annexe:' in link.attrib.get('href'):
-                    continue
-                words.append(link.text_content())
-        related[count] = {}
-        related[count]['description'] = description
-        related[count]['words'] = words
+            else:
+                for link in section.cssselect('a'):
+                    if 'Annexe:' in link.attrib.get('href'):
+                        continue
+                    words.append(link.text_content())
+            related[count] = {}
+            related[count]['description'] = description
+            related[count]['words'] = words
         section = section.getnext()
         count += 1
     return related

From 249b02baa15beb3849fff73cd2897693156ed108 Mon Sep 17 00:00:00 2001
From: Lambert Rosique <lambertrosique@hotmail.com>
Date: Sat, 8 Jan 2022 20:10:53 +0100
Subject: [PATCH 3/7] Fix Issue #22

---
 wiktionnaireparser/parser.py | 42 +++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/wiktionnaireparser/parser.py b/wiktionnaireparser/parser.py
index 244ddf1..2fdf7d2 100644
--- a/wiktionnaireparser/parser.py
+++ b/wiktionnaireparser/parser.py
@@ -274,26 +274,28 @@ def get_translations(self, translation_id):
         lines = section.getnext().cssselect('li')
 
         for line in lines:
-            language = line.find('span').text_content()
-            transl = []
-            links = line.find('a')
-            while links is not None:
-                '''
-                try:
-                    if links.attrib.get('class').endswith('-Latn'):
-                        links = links.getnext()
-                        continue
-                except AttributeError:
-                    pass
-                '''
-                if links.attrib.get('class') != 'trad-exposant' and links.attrib:
-                    if links.attrib.get('class') is None:
-                        transl.append(links.text_content())
-                    # Ignore translittérations
-                    elif not links.attrib.get('class').endswith('-Latn'):
-                        transl.append(links.text_content())
-                links = links.getnext()
-            result[language] = transl
+            language = line.find('span')
+            if language is not None:
+                language = language.text_content()
+                transl = []
+                links = line.find('a')
+                while links is not None:
+                    '''
+                    try:
+                        if links.attrib.get('class').endswith('-Latn'):
+                            links = links.getnext()
+                            continue
+                    except AttributeError:
+                        pass
+                    '''
+                    if links.attrib.get('class') != 'trad-exposant' and links.attrib:
+                        if links.attrib.get('class') is None:
+                            transl.append(links.text_content())
+                        # Ignore translittérations
+                        elif not links.attrib.get('class').endswith('-Latn'):
+                            transl.append(links.text_content())
+                    links = links.getnext()
+                result[language] = transl
         return result
 
 

From 58b7cee3cd30767f4c40d2ad0193018dcfa5ce33 Mon Sep 17 00:00:00 2001
From: Lambert Rosique <lambertrosique@hotmail.com>
Date: Sun, 9 Jan 2022 01:54:46 +0100
Subject: [PATCH 4/7] Fix issue #24

---
 wiktionnaireparser/parser.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/wiktionnaireparser/parser.py b/wiktionnaireparser/parser.py
index 2fdf7d2..51e6d91 100644
--- a/wiktionnaireparser/parser.py
+++ b/wiktionnaireparser/parser.py
@@ -99,14 +99,15 @@ def _find_lang_sections_id(self):
         self.sections_id = {}
         for section in lang.getnext().getchildren():  # 'li'
             section_id = section.find('a').attrib['href']
-            # Subsections?
-            if section.find('ul') is None:
-                self.sections_id[section_id] = []
-                continue
-            subsections = []
-            for subsection in section.find('ul'):
-                subsections.append(subsection.find('a').attrib['href'])
-            self.sections_id[section_id] = subsections
+            if not "*" in section_id:
+                # Subsections?
+                if section.find('ul') is None:
+                    self.sections_id[section_id] = []
+                    continue
+                subsections = []
+                for subsection in section.find('ul'):
+                    subsections.append(subsection.find('a').attrib['href'])
+                self.sections_id[section_id] = subsections
 
         return self.sections_id
 
@@ -133,7 +134,7 @@ def get_parts_of_speech(self):
         parts_of_speech = {}
         useless_sections = (
             r'Étymologie', r'Prononciation', r'Références', r'Voir_aussi',
-            r'Anagrammes', r'Liens_externes'
+            r'Anagrammes', r'Liens_externes', r'Erreurs*',
         )
         sections = filter_sections_id(self.sections_id.keys(), useless_sections)
         for section_name in sections:

From 8eb34e6f99d15395788754eeab3540cb41eb00c8 Mon Sep 17 00:00:00 2001
From: Lambert Rosique <lambertrosique@hotmail.com>
Date: Sun, 9 Jan 2022 02:19:35 +0100
Subject: [PATCH 5/7] Fix 2nd part of issue #24 with "_"

---
 wiktionnaireparser/parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/wiktionnaireparser/parser.py b/wiktionnaireparser/parser.py
index 51e6d91..2b2415e 100644
--- a/wiktionnaireparser/parser.py
+++ b/wiktionnaireparser/parser.py
@@ -99,7 +99,7 @@ def _find_lang_sections_id(self):
         self.sections_id = {}
         for section in lang.getnext().getchildren():  # 'li'
             section_id = section.find('a').attrib['href']
-            if not "*" in section_id:
+            if not "*" in section_id and not "_" in section_id:
                 # Subsections?
                 if section.find('ul') is None:
                     self.sections_id[section_id] = []
@@ -134,7 +134,7 @@ def get_parts_of_speech(self):
         parts_of_speech = {}
         useless_sections = (
             r'Étymologie', r'Prononciation', r'Références', r'Voir_aussi',
-            r'Anagrammes', r'Liens_externes', r'Erreurs*',
+            r'Anagrammes', r'Liens_externes', r'Erreurs*', r'=_Synonymes'
         )
         sections = filter_sections_id(self.sections_id.keys(), useless_sections)
         for section_name in sections:

From c7597a4820aace2f304b6c34840d0e609cc0c5a7 Mon Sep 17 00:00:00 2001
From: Surkal <Surkal@users.noreply.github.com>
Date: Sat, 19 Feb 2022 14:22:34 +0100
Subject: [PATCH 6/7] perf: make the default requirements file lighter

---
 requirements-dev.txt | 5 +++++
 requirements.txt     | 2 --
 2 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 requirements-dev.txt

diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..756aab5
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,5 @@
+pytest==6.1.1
+wikitextparser==0.47.0
+coverage==5.3
+requests==2.24.0
+pyquery==1.4.1
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index cc0f971..3380ac2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,3 @@
-pytest==6.1.1
 wikitextparser==0.47.0
-coverage==5.3
 requests==2.24.0
 pyquery==1.4.1

From b90b257f6c849e0799e879bc93f98674be7800c9 Mon Sep 17 00:00:00 2001
From: Surkal <Surkal@users.noreply.github.com>
Date: Sun, 20 Feb 2022 13:22:29 +0100
Subject: [PATCH 7/7] style: use isinstance() rather than type()

---
 wiktionnaireparser/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/wiktionnaireparser/utils.py b/wiktionnaireparser/utils.py
index cdebe9c..9a63d07 100644
--- a/wiktionnaireparser/utils.py
+++ b/wiktionnaireparser/utils.py
@@ -3,6 +3,7 @@
 import re
 import json
 from contextlib import suppress
+
 from lxml.html import HtmlComment
 
 
@@ -38,7 +39,7 @@ def extract_related_words(section):
     while section is not None and section.tag not in ('h3', 'h4'):
         words = []
         description = ''
-        if not type(section) is HtmlComment:
+        if not isinstance(section, HtmlComment):
             if section.cssselect('.NavContent'):
                 with suppress(IndexError):
                     description = section.cssselect('.NavHead')[0].text_content()