diff --git a/iscc_core/check.py b/iscc_core/check.py index 7290028..c4ec650 100644 --- a/iscc_core/check.py +++ b/iscc_core/check.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- """Inspect lib environment/installation""" import inspect -from loguru import logger as log __all__ = ["turbo"] @@ -15,7 +14,6 @@ def turbo(): # pragma: no cover modules = (cdc, minhash, simhash, dct, wtahash) for module in modules: module_file = inspect.getfile(module) - log.debug(f"Module {module.__name__} file: {module_file}") if module_file.endswith(".py") or module_file.endswith(".pyc"): return False return True diff --git a/poetry.lock b/poetry.lock index 129ac90..b2640ba 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1229,6 +1229,47 @@ tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""} [package.extras] poetry-plugin = ["poetry (>=1.0,<2.0)"] +[[package]] +name = "psutil" +version = "6.1.0" +description = "Cross-platform lib for process and system monitoring in Python." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ + {file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"}, + {file = "psutil-6.1.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942"}, + {file = "psutil-6.1.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:000d1d1ebd634b4efb383f4034437384e44a6d455260aaee2eca1e9c1b55f047"}, + {file = "psutil-6.1.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:5cd2bcdc75b452ba2e10f0e8ecc0b57b827dd5d7aaffbc6821b2a9a242823a76"}, + {file = "psutil-6.1.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:045f00a43c737f960d273a83973b2511430d61f283a44c96bf13a6e829ba8fdc"}, + {file = "psutil-6.1.0-cp27-none-win32.whl", hash = "sha256:9118f27452b70bb1d9ab3198c1f626c2499384935aaf55388211ad982611407e"}, + {file = "psutil-6.1.0-cp27-none-win_amd64.whl", hash = "sha256:a8506f6119cff7015678e2bce904a4da21025cc70ad283a53b099e7620061d85"}, + {file = "psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688"}, + {file = "psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e"}, + {file = "psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38"}, + {file = "psutil-6.1.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:498c6979f9c6637ebc3a73b3f87f9eb1ec24e1ce53a7c5173b8508981614a90b"}, + {file = "psutil-6.1.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d905186d647b16755a800e7263d43df08b790d709d575105d419f8b6ef65423a"}, + {file = "psutil-6.1.0-cp36-cp36m-win32.whl", hash = "sha256:6d3fbbc8d23fcdcb500d2c9f94e07b1342df8ed71b948a2649b5cb060a7c94ca"}, + {file = "psutil-6.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1209036fbd0421afde505a4879dee3b2fd7b1e14fee81c0069807adcbbcca747"}, + {file = "psutil-6.1.0-cp37-abi3-win32.whl", hash = "sha256:1ad45a1f5d0b608253b11508f80940985d1d0c8f6111b5cb637533a0e6ddc13e"}, + {file = "psutil-6.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:a8fb3752b491d246034fa4d279ff076501588ce8cbcdbb62c32fd7a377d996be"}, + {file = "psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a"}, +] + +[package.extras] +dev = ["black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "wheel"] +test = ["pytest", "pytest-xdist", "setuptools"] + +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +description = "Get CPU info with pure Python" +optional = false +python-versions = "*" +files = [ + {file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"}, + {file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"}, +] + [[package]] name = "pybase64" version = "1.4.0" @@ -2130,4 +2171,4 @@ turbo = ["cython"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "d50b1b614fa1e3a976ce7802a02cf95617e0c4ed41c3db3ee431b83a3dc154e8" +content-hash = "282cef5d0a2683c6404dbb0475c3bd4edb4dd044969ef4884770441b81c19bf3" diff --git a/pyproject.toml b/pyproject.toml index b1817a4..bb0ff4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,10 @@ black = "*" ruff = "*" bandit = "*" +[tool.poetry.group.benchmark.dependencies] +psutil = "*" +py-cpuinfo = "*" + [tool.poetry.group.docs.dependencies] mkdocs-material = "^9.0" mkdocstrings-python = "<1.1" diff --git a/tests/benchmark.py b/tests/benchmark.py new file mode 100644 index 0000000..ea4ae04 --- /dev/null +++ b/tests/benchmark.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +import time +import psutil +import random +import string +import unicodedata +import platform +import iscc_core as ic +from iscc_core.code_content_text import gen_text_code + +try: + import cpuinfo +except ImportError: + print("cpuinfo module not found. Install it using: pip install py-cpuinfo") + cpuinfo = None + + +def generate_text(length, seed=42): + random.seed(seed) + unicode_ranges = [ + (0x0021, 0x007E), # Basic Latin + (0x00A1, 0x00FF), # Latin-1 Supplement + (0x0100, 0x017F), # Latin Extended-A + (0x0180, 0x024F), # Latin Extended-B + (0x0370, 0x03FF), # Greek and Coptic + (0x0400, 0x04FF), # Cyrillic + (0x3040, 0x309F), # Hiragana + (0x4E00, 0x9FFF), # CJK Unified Ideographs (subset) + ] + + text = [] + while len("".join(text)) < length: + char_type = random.choice(["letter", "digit", "punctuation", "whitespace"]) + if char_type == "letter": + range_start, range_end = random.choice(unicode_ranges) + char = chr(random.randint(range_start, range_end)) + elif char_type == "digit": + char = random.choice(string.digits) + elif char_type == "punctuation": + char = random.choice(string.punctuation) + else: + char = " " + text.append(char) + + return "".join(text)[:length] + + +def benchmark_gen_text_code(text_length, iterations=100): + text = generate_text(text_length) + memory_usage = psutil.Process().memory_info().rss / 1024 / 1024 # in MB + + start_time = time.time() + for _ in range(iterations): + gen_text_code(text) + end_time = time.time() + max_memory = psutil.Process().memory_info().rss / 1024 / 1024 # in MB + + total_time = end_time - start_time + pages_per_second = (iterations * text_length / 3000) / total_time + memory_increase = max_memory - memory_usage + + return pages_per_second, memory_increase + + +def main(): + text_length = 3000 * 10 # 10 pages + iterations = 10 + + pages_per_second, memory_increase = benchmark_gen_text_code(text_length, iterations) + + print("System Information:") + print(f"OS: {platform.system()} {platform.release()}") + print(f"Python version: {platform.python_version()}") + print(f"Processor: {platform.processor()}") + print( + f"CPU cores: {psutil.cpu_count(logical=False)} (Physical)," + f" {psutil.cpu_count(logical=True)} (Logical)" + ) + print(f"CPU speed: {psutil.cpu_freq().current / 1000:.2f} GHz") + print(f"Total memory: {psutil.virtual_memory().total / (1024**3):.2f} GB") + + if cpuinfo: + cpu_info = cpuinfo.get_cpu_info() + print("\nCPU Features:") + print(f"Brand: {cpu_info['brand_raw']}") + print(f"Architecture: {cpu_info['arch']}") + print(f"Bits: {cpu_info['bits']}") + + # SIMD and other relevant features + relevant_flags = [ + "sse", + "sse2", + "sse3", + "ssse3", + "sse4_1", + "sse4_2", + "avx", + "avx2", + "fma3", + "mmx", + "neon", + ] + supported_flags = [flag for flag in relevant_flags if flag in cpu_info["flags"]] + print(f"Instructions: {', '.join(supported_flags)}") + + print("\nBenchmark results for gen_text_code:") + print(f"Pages per second: {pages_per_second:.2f}") + print(f"Max memory increase: {memory_increase:.2f} MB") + print(f"Cython extension modules used: {ic.turbo()}") + + +if __name__ == "__main__": + main()