From 3e3873149ca4bebb8bc7e19f46beb154cb649550 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 5 Dec 2023 21:49:32 +0100 Subject: [PATCH] Faster Python imports with prepackaged algo metadata Instead of using an environmental variable ESSENTIA_PYTHON_NODOC, prepare the metadata at the build stage. Load on import. --- src/python/essentia/meta.py | 35 ++++++++++++++++++++++++++++++++ src/python/essentia/standard.py | 34 +++++++++++++++---------------- src/python/essentia/streaming.py | 33 +++++++++++++++--------------- src/python/wscript | 20 ++++++++++++++++++ 4 files changed, 87 insertions(+), 35 deletions(-) create mode 100644 src/python/essentia/meta.py diff --git a/src/python/essentia/meta.py b/src/python/essentia/meta.py new file mode 100644 index 000000000..88484c553 --- /dev/null +++ b/src/python/essentia/meta.py @@ -0,0 +1,35 @@ +from json import dump +from os.path import join +import essentia +from ._essentia import Algorithm, StreamingAlgorithm, keys, skeys + + +def _metadata_standard(): + meta = {} + for name in keys(): + essentia.log.debug(essentia.EPython, 'Loading __doc__ and __struct__ metadata for essentia.standard class: %s' % name) + _algoInstance = Algorithm(name) + meta[name] = {} + meta[name]['__doc__'] = _algoInstance.getDoc() + meta[name]['__struct__'] = _algoInstance.getStruct() + del _algoInstance + return meta + + +def _metadata_streaming(): + meta = {} + for name in skeys(): + essentia.log.debug(essentia.EPython, 'Loading __doc__ and __struct__ metadata for essentia.streaming class: %s' % name) + _algoInstance = StreamingAlgorithm(name) + meta[name] = {} + meta[name]['__doc__'] = _algoInstance.getDoc() + meta[name]['__struct__'] = _algoInstance.getStruct() + del _algoInstance + return meta + + +def _extract_metadata(filedir): + """ Loads algorithms' metadata (__doc__ and __struct__) from the C extension + and stores it to files in a filedir""" + dump(_metadata_standard(), open(join(filedir, 'standard.meta.json'), 'w')) + dump(_metadata_streaming(), open(join(filedir, 'streaming.meta.json'), 'w')) diff --git a/src/python/essentia/standard.py b/src/python/essentia/standard.py index d148b855e..34f113898 100644 --- a/src/python/essentia/standard.py +++ b/src/python/essentia/standard.py @@ -22,27 +22,16 @@ import sys as _sys from ._essentia import keys as algorithmNames, info as algorithmInfo from copy import copy -from os import getenv +import os.path +import json -# Whether to skip loading algorithms for reading their metadata (faster import). -ESSENTIA_PYTHON_NODOC = getenv('ESSENTIA_PYTHON_NODOC', False) -ESSENTIA_PYTHON_NODOC = (ESSENTIA_PYTHON_NODOC == 'True' or - ESSENTIA_PYTHON_NODOC == 'true' or - ESSENTIA_PYTHON_NODOC == '1') - # given an essentia algorithm name, create the corresponding class -def _create_essentia_class(name, moduleName = __name__): +def _create_essentia_class(name, meta, moduleName = __name__): essentia.log.debug(essentia.EPython, 'Creating essentia.standard class: %s' % name) - if not ESSENTIA_PYTHON_NODOC or name == "FrameCutter": - _algoInstance = _essentia.Algorithm(name) - _algoDoc = _algoInstance.getDoc() - _algoStruct = _algoInstance.getStruct() - del _algoInstance - else: - _algoDoc = None - _algoStruct = None + _algoDoc = meta[name]['__doc__'] + _algoStruct = meta[name]['__struct__'] class Algo(_essentia.Algorithm): __doc__ = _algoDoc @@ -147,8 +136,17 @@ def __str__(self): # load all classes into python def _reloadAlgorithms(moduleName = __name__): - for name in _essentia.keys(): - _create_essentia_class(name, moduleName) + meta_file = 'standard.meta.json' + essentia.log.debug(essentia.EPython, f'Loading __doc__ and __struct__ metadata for essentia.standard from {meta_file}') + # Looking for a metadata file in the same directory as `standard.py` + dir_path = os.path.dirname(os.path.realpath(__file__)) + file_path = os.path.join(dir_path, meta_file) + with open(file_path, 'r') as f: + meta = json.load(f) + + for name in algorithmNames(): + _create_essentia_class(name, meta, moduleName) + _reloadAlgorithms() diff --git a/src/python/essentia/streaming.py b/src/python/essentia/streaming.py index 0fdbec29e..f3acbbf59 100644 --- a/src/python/essentia/streaming.py +++ b/src/python/essentia/streaming.py @@ -21,15 +21,10 @@ import sys as _sys from . import common as _c from ._essentia import skeys as algorithmNames, sinfo as algorithmInfo -from os import getenv +import os.path +import json -# Whether to skip loading algorithms for reading their metadata (faster import). -ESSENTIA_PYTHON_NODOC = getenv('ESSENTIA_PYTHON_NODOC', False) -ESSENTIA_PYTHON_NODOC = (ESSENTIA_PYTHON_NODOC == 'True' or - ESSENTIA_PYTHON_NODOC == 'true' or - ESSENTIA_PYTHON_NODOC == '1') - # Used as a place-holder for sources and sinks, implements the right shift # operator class _StreamConnector: @@ -144,17 +139,11 @@ def totalProduced(self): -def _create_streaming_algo(givenname): +def _create_streaming_algo(givenname, meta): essentia.log.debug(essentia.EPython, 'Creating essentia.streaming class: %s' % givenname) - if not ESSENTIA_PYTHON_NODOC or givenname == 'FrameCutter': - _algoInstance = _essentia.StreamingAlgorithm(givenname) - _algoDoc = _algoInstance.getDoc() - _algoStruct = _algoInstance.getStruct() - del _algoInstance - else: - _algoDoc = None - _algoStruct = None + _algoDoc = meta[givenname]['__doc__'] + _algoStruct = meta[givenname]['__struct__'] class StreamingAlgo(_essentia.StreamingAlgorithm): __doc__ = _algoDoc @@ -207,11 +196,21 @@ def configure(self, **kwargs): # load all streaming algorithms into module def _reloadStreamingAlgorithms(): + meta_file = 'streaming.meta.json' + essentia.log.debug(essentia.EPython, f'Loading __doc__ and __struct__ metadata for essentia.streaming from {meta_file}') + # Looking for a metadata file in the same directory as `streaming.py` + dir_path = os.path.dirname(os.path.realpath(__file__)) + file_path = os.path.join(dir_path, meta_file) + with open(file_path, 'r') as f: + meta = json.load(f) + for name in algorithmNames(): - _create_streaming_algo(name) + _create_streaming_algo(name, meta) + _reloadStreamingAlgorithms() + # This subclass provides some more functionality for VectorInput class VectorInput(_essentia.VectorInput): __doc__ = 'VectorInput v1.0\n\n\n'+\ diff --git a/src/python/wscript b/src/python/wscript index 1bf9147aa..56d707b89 100644 --- a/src/python/wscript +++ b/src/python/wscript @@ -5,6 +5,7 @@ from __future__ import print_function import distutils.sysconfig import os +import sys def options(ctx): @@ -58,5 +59,24 @@ def build(ctx): use = ctx.env.USE_LIBS if ctx.env.ONLY_PYTHON else 'essentia ' #+ ctx.env.USE_LIBS ) + # Create local python package folder. + print("Precompute algorithms' help metadata (__doc__ and __struct__)") + PYTHONPATH = 'build/python' + os.system(f'rm -rf {PYTHONPATH}') + os.system(f'mkdir -p {PYTHONPATH}') + os.system(f'cp -r src/python/essentia {PYTHONPATH}/') + + # TODO These filenames only work for Linux. + os.system(f'cp build/src/python/_essentia*.so {PYTHONPATH}/essentia') + os.system(f'cp build/src/libessentia.so {PYTHONPATH}/essentia') + + pythonpath = os.path.abspath(PYTHONPATH) + ldpath = os.path.join(pythonpath, 'essentia') + python_code = f'from essentia.meta import _extract_metadata; _extract_metadata(\"{PYTHONPATH}/essentia\")' + os.system(f"PYTHONPATH={pythonpath} LD_LIBRARY_PATH={ldpath} {sys.executable} -c '{python_code}'") + print("Algorithms' metadata created") + ctx.install_files('${PYTHONDIR}', ctx.path.ant_glob('essentia/**/*.py'), relative_trick=True) + ctx.install_files('${PYTHONDIR}/essentia', ctx.path.parent.parent.ant_glob(f'{PYTHONPATH}/essentia/*.meta.json'), + relative_trick=False)