From 13f3d439cb60e35e3600f0443db7199c70c944b8 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 31 Jul 2023 21:13:28 +0530 Subject: [PATCH 1/7] ref: added logic to fallback to ahocode - made changes in match_unknown.py and match_aho.py to fallback to ahocode. Signed-off-by: 35C4n0r --- setup.cfg | 1 + src/licensedcode/match_aho.py | 5 ++++- src/licensedcode/match_unknown.py | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index abaaa0fc497..1793993f2a3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -65,6 +65,7 @@ python_requires = >=3.7 install_requires = attrs >= 18.1,!=20.1.0;python_version<'3.11' attrs >= 22.1.0;python_version>='3.11' + ahocode Beautifulsoup4 >= 4.0.0 boolean.py >= 4.0 chardet >= 3.0.0 diff --git a/src/licensedcode/match_aho.py b/src/licensedcode/match_aho.py index 7a46821a31e..cf2dcb9cafc 100644 --- a/src/licensedcode/match_aho.py +++ b/src/licensedcode/match_aho.py @@ -9,7 +9,10 @@ from itertools import groupby -import ahocorasick +try: + import ahocorasick +except ImportError: + import ahocode from licensedcode import SMALL_RULE from licensedcode.match import LicenseMatch diff --git a/src/licensedcode/match_unknown.py b/src/licensedcode/match_unknown.py index b83a62a01e0..bd219698e11 100644 --- a/src/licensedcode/match_unknown.py +++ b/src/licensedcode/match_unknown.py @@ -7,7 +7,10 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import ahocorasick +try: + import ahocorasick +except ImportError: + import ahocode from licensedcode import tokenize from licensedcode.models import UnknownRule From 03a8a5a5c8852d0498b3f75e8036067ca1c8d10c Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 31 Jul 2023 21:29:14 +0530 Subject: [PATCH 2/7] ref: added logic to fallback to bitcode - added imports in spans.py, index.py, match_unknown.py and match_set.py to fallback to ahocode. Signed-off-by: 35C4n0r --- src/licensedcode/index.py | 5 ++++- src/licensedcode/match_aho.py | 2 +- src/licensedcode/match_set.py | 5 ++++- src/licensedcode/match_unknown.py | 2 +- src/licensedcode/query.py | 5 ++++- src/licensedcode/spans.py | 5 ++++- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/licensedcode/index.py b/src/licensedcode/index.py index 267a15f47be..5c8b1744c0a 100644 --- a/src/licensedcode/index.py +++ b/src/licensedcode/index.py @@ -17,7 +17,10 @@ import sys from time import time -from intbitset import intbitset +try: + from intbitset import intbitset +except ImportError: + from bitcode.bitcode import intbitset from licensedcode import SMALL_RULE from licensedcode import TINY_RULE diff --git a/src/licensedcode/match_aho.py b/src/licensedcode/match_aho.py index cf2dcb9cafc..831e9f33a5f 100644 --- a/src/licensedcode/match_aho.py +++ b/src/licensedcode/match_aho.py @@ -12,7 +12,7 @@ try: import ahocorasick except ImportError: - import ahocode + import ahocode as ahocorasick from licensedcode import SMALL_RULE from licensedcode.match import LicenseMatch diff --git a/src/licensedcode/match_set.py b/src/licensedcode/match_set.py index 105eeb9319e..f79b71a0939 100644 --- a/src/licensedcode/match_set.py +++ b/src/licensedcode/match_set.py @@ -12,7 +12,10 @@ from functools import partial from itertools import groupby -from intbitset import intbitset +try: + from intbitset import intbitset +except ImportError: + from bitcode.bitcode import intbitset from licensedcode.tokenize import ngrams diff --git a/src/licensedcode/match_unknown.py b/src/licensedcode/match_unknown.py index bd219698e11..cf3821df4e3 100644 --- a/src/licensedcode/match_unknown.py +++ b/src/licensedcode/match_unknown.py @@ -10,7 +10,7 @@ try: import ahocorasick except ImportError: - import ahocode + import ahocode as ahocorasick from licensedcode import tokenize from licensedcode.models import UnknownRule diff --git a/src/licensedcode/query.py b/src/licensedcode/query.py index c0fc20d6339..cef4f228024 100644 --- a/src/licensedcode/query.py +++ b/src/licensedcode/query.py @@ -15,7 +15,10 @@ from functools import partial from itertools import chain -from intbitset import intbitset +try: + from intbitset import intbitset +except ImportError: + from bitcode.bitcode import intbitset import typecode diff --git a/src/licensedcode/spans.py b/src/licensedcode/spans.py index 2b0348b9832..e15acdfc3f5 100644 --- a/src/licensedcode/spans.py +++ b/src/licensedcode/spans.py @@ -31,7 +31,10 @@ from itertools import count from itertools import groupby -from intbitset import intbitset +try: + from intbitset import intbitset +except ImportError: + from bitcode.bitcode import intbitset """ Ranges and intervals of integers using bitmaps. From 994a7fa943c5100cd1c0327131895ed3ca98b3e2 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 31 Jul 2023 21:44:41 +0530 Subject: [PATCH 3/7] ref: added bitcode to setup.cfg Signed-off-by: 35C4n0r --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 1793993f2a3..f5ae75ea4b0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -67,6 +67,7 @@ install_requires = attrs >= 22.1.0;python_version>='3.11' ahocode Beautifulsoup4 >= 4.0.0 + bitcode boolean.py >= 4.0 chardet >= 3.0.0 click >= 6.7, !=7.0 From 61973d32bd1ea36e2f7f53c0212ff2b524273c62 Mon Sep 17 00:00:00 2001 From: 35C4n0r Date: Sun, 27 Aug 2023 22:44:53 +0530 Subject: [PATCH 4/7] ref: refactored Imports to fallback to sanexml. - Added try catch blocks for import. Signed-off-by: Jay --- setup.cfg | 1 + src/formattedcode/output_cyclonedx.py | 5 ++++- src/packagedcode/maven.py | 14 ++++++++++---- tests/formattedcode/test_output_cyclonedx.py | 5 ++++- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/setup.cfg b/setup.cfg index f5ae75ea4b0..fb461257287 100644 --- a/setup.cfg +++ b/setup.cfg @@ -106,6 +106,7 @@ install_requires = pygments pymaven_patch >= 0.2.8 requests >= 2.7.0 + sanexml saneyaml >= 0.6.0 spdx_tools == 0.7.0rc0 text_unidecode >= 1.0 diff --git a/src/formattedcode/output_cyclonedx.py b/src/formattedcode/output_cyclonedx.py index 236c4152118..845434db823 100644 --- a/src/formattedcode/output_cyclonedx.py +++ b/src/formattedcode/output_cyclonedx.py @@ -22,7 +22,10 @@ import attr from commoncode.cliutils import OUTPUT_GROUP from commoncode.cliutils import PluggableCommandLineOption -from lxml import etree +try: + from lxml import etree +except ImportError: + from sanexml import etree from plugincode.output import OutputPlugin from plugincode.output import output_impl diff --git a/src/packagedcode/maven.py b/src/packagedcode/maven.py index b5d521dc44a..fee2e72bd30 100644 --- a/src/packagedcode/maven.py +++ b/src/packagedcode/maven.py @@ -12,7 +12,10 @@ from pprint import pformat import javaproperties -import lxml +try: + from lxml import etree +except ImportError: + from sanexml import etree from fnmatch import fnmatchcase from packageurl import PackageURL from pymaven import artifact @@ -425,16 +428,17 @@ def __init__(self, location=None, text=None): xml_text = analysis.unicode_text(location) else: xml_text = text + xml_text = xml_text.strip().strip("'").strip("\n'") xml_text = strip_namespace(xml_text) xml_text = xml_text.encode('utf-8') if TRACE: logger.debug('MavenPom.__init__: xml_text: {}'.format(xml_text)) - self._pom_data = lxml.etree.fromstring(xml_text, parser=pom.POM_PARSER) # NOQA + self._pom_data = etree.fromstring(xml_text, parser=pom.POM_PARSER()) # NOQA # collect and then remove XML comments from the XML elements tree self.comments = self._get_comments() - lxml.etree.strip_tags(self._pom_data, lxml.etree.Comment) # NOQA + etree.strip_tags(self._pom_data, etree.Comment) # NOQA # FIXME: we do not use a client for now. # There are pending issues at pymaven to address this @@ -753,7 +757,9 @@ def _get_comments(self, xml=None): """Return a list of comment texts or an empty list.""" if xml is None: xml = self.pom_data - comments = [c.text for c in xml.xpath('//comment()')] + expression = etree.XPath('//comment()') + children = expression(xml) + comments = [c.text for c in children] return [c.strip() for c in comments if c and c.strip()] def _find_licenses(self): diff --git a/tests/formattedcode/test_output_cyclonedx.py b/tests/formattedcode/test_output_cyclonedx.py index 7d51da291ff..7e9aa4db97b 100644 --- a/tests/formattedcode/test_output_cyclonedx.py +++ b/tests/formattedcode/test_output_cyclonedx.py @@ -13,7 +13,10 @@ import os import saneyaml -from lxml import etree +try: + from lxml import etree +except ImportError: + from sanexml import etree from commoncode.testcase import FileDrivenTesting from formattedcode.output_cyclonedx import CycloneDxComponent From 1d7dc8a5e501f6b1b048ffbe6e2e26b4e51b53b9 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 28 Aug 2023 21:38:26 +0530 Subject: [PATCH 5/7] Revert "ref: refactored Imports to fallback to sanexml." This reverts commit 61973d32bd1ea36e2f7f53c0212ff2b524273c62. --- setup.cfg | 2 -- src/formattedcode/output_cyclonedx.py | 5 +---- src/packagedcode/maven.py | 14 ++++---------- tests/formattedcode/test_output_cyclonedx.py | 5 +---- 4 files changed, 6 insertions(+), 20 deletions(-) diff --git a/setup.cfg b/setup.cfg index fb461257287..1793993f2a3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -67,7 +67,6 @@ install_requires = attrs >= 22.1.0;python_version>='3.11' ahocode Beautifulsoup4 >= 4.0.0 - bitcode boolean.py >= 4.0 chardet >= 3.0.0 click >= 6.7, !=7.0 @@ -106,7 +105,6 @@ install_requires = pygments pymaven_patch >= 0.2.8 requests >= 2.7.0 - sanexml saneyaml >= 0.6.0 spdx_tools == 0.7.0rc0 text_unidecode >= 1.0 diff --git a/src/formattedcode/output_cyclonedx.py b/src/formattedcode/output_cyclonedx.py index 845434db823..236c4152118 100644 --- a/src/formattedcode/output_cyclonedx.py +++ b/src/formattedcode/output_cyclonedx.py @@ -22,10 +22,7 @@ import attr from commoncode.cliutils import OUTPUT_GROUP from commoncode.cliutils import PluggableCommandLineOption -try: - from lxml import etree -except ImportError: - from sanexml import etree +from lxml import etree from plugincode.output import OutputPlugin from plugincode.output import output_impl diff --git a/src/packagedcode/maven.py b/src/packagedcode/maven.py index fee2e72bd30..b5d521dc44a 100644 --- a/src/packagedcode/maven.py +++ b/src/packagedcode/maven.py @@ -12,10 +12,7 @@ from pprint import pformat import javaproperties -try: - from lxml import etree -except ImportError: - from sanexml import etree +import lxml from fnmatch import fnmatchcase from packageurl import PackageURL from pymaven import artifact @@ -428,17 +425,16 @@ def __init__(self, location=None, text=None): xml_text = analysis.unicode_text(location) else: xml_text = text - xml_text = xml_text.strip().strip("'").strip("\n'") xml_text = strip_namespace(xml_text) xml_text = xml_text.encode('utf-8') if TRACE: logger.debug('MavenPom.__init__: xml_text: {}'.format(xml_text)) - self._pom_data = etree.fromstring(xml_text, parser=pom.POM_PARSER()) # NOQA + self._pom_data = lxml.etree.fromstring(xml_text, parser=pom.POM_PARSER) # NOQA # collect and then remove XML comments from the XML elements tree self.comments = self._get_comments() - etree.strip_tags(self._pom_data, etree.Comment) # NOQA + lxml.etree.strip_tags(self._pom_data, lxml.etree.Comment) # NOQA # FIXME: we do not use a client for now. # There are pending issues at pymaven to address this @@ -757,9 +753,7 @@ def _get_comments(self, xml=None): """Return a list of comment texts or an empty list.""" if xml is None: xml = self.pom_data - expression = etree.XPath('//comment()') - children = expression(xml) - comments = [c.text for c in children] + comments = [c.text for c in xml.xpath('//comment()')] return [c.strip() for c in comments if c and c.strip()] def _find_licenses(self): diff --git a/tests/formattedcode/test_output_cyclonedx.py b/tests/formattedcode/test_output_cyclonedx.py index 7e9aa4db97b..7d51da291ff 100644 --- a/tests/formattedcode/test_output_cyclonedx.py +++ b/tests/formattedcode/test_output_cyclonedx.py @@ -13,10 +13,7 @@ import os import saneyaml -try: - from lxml import etree -except ImportError: - from sanexml import etree +from lxml import etree from commoncode.testcase import FileDrivenTesting from formattedcode.output_cyclonedx import CycloneDxComponent From b5cc16906d22d6ec4e6b0b0c2def8a91d43ce633 Mon Sep 17 00:00:00 2001 From: 35C4n0r Date: Tue, 29 Aug 2023 00:37:14 +0530 Subject: [PATCH 6/7] ref: added test packages to setup.cfg Signed-off-by: Jay --- setup.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 1793993f2a3..da7487c5aba 100644 --- a/setup.cfg +++ b/setup.cfg @@ -65,8 +65,9 @@ python_requires = >=3.7 install_requires = attrs >= 18.1,!=20.1.0;python_version<'3.11' attrs >= 22.1.0;python_version>='3.11' - ahocode + ahocode-test >= 1.0.0 Beautifulsoup4 >= 4.0.0 + test-bitcode-alt >= 0.0.1 boolean.py >= 4.0 chardet >= 3.0.0 click >= 6.7, !=7.0 From 92c8243daee6b80c3f0a1bad093cd5f1045e0edd Mon Sep 17 00:00:00 2001 From: 35C4n0r Date: Tue, 29 Aug 2023 00:57:43 +0530 Subject: [PATCH 7/7] ref: removed intbitset and pyahocorasick from setup.cfg Signed-off-by: Jay --- setup.cfg | 4 ++-- src/licensedcode/match_aho.py | 2 +- src/licensedcode/match_unknown.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index da7487c5aba..e79f05573a0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -82,7 +82,7 @@ install_requires = gemfileparser2 >= 0.9.0 html5lib importlib_metadata - intbitset >= 3.0.2 +; intbitset >= 3.0.2 jaraco.functools javaproperties >= 0.5 jinja2 >= 2.7.0 @@ -101,7 +101,7 @@ install_requires = pluggy >= 1.0.0 plugincode >= 32.0.0 publicsuffix2 - pyahocorasick >= 2.0.0 +; pyahocorasick >= 2.0.0 pygmars >= 0.7.0 pygments pymaven_patch >= 0.2.8 diff --git a/src/licensedcode/match_aho.py b/src/licensedcode/match_aho.py index 831e9f33a5f..59226b9acf1 100644 --- a/src/licensedcode/match_aho.py +++ b/src/licensedcode/match_aho.py @@ -12,7 +12,7 @@ try: import ahocorasick except ImportError: - import ahocode as ahocorasick + import ahocode.ahocode as ahocorasick from licensedcode import SMALL_RULE from licensedcode.match import LicenseMatch diff --git a/src/licensedcode/match_unknown.py b/src/licensedcode/match_unknown.py index cf3821df4e3..d0414d0b25a 100644 --- a/src/licensedcode/match_unknown.py +++ b/src/licensedcode/match_unknown.py @@ -10,7 +10,7 @@ try: import ahocorasick except ImportError: - import ahocode as ahocorasick + import ahocode.ahocode as ahocorasick from licensedcode import tokenize from licensedcode.models import UnknownRule