From 31763d5e545b3ec4d27fbff01786c0e90328e6aa Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Fri, 22 Jul 2022 13:38:17 -0600 Subject: [PATCH 01/26] LogicOverhaul: First pass at a complete overhaul! This is a bit of a monolithic commit, but there's no real way to decompose it. The main change here is that the tracking is now being done post-import by using `dis` to parse the bytecode of all modules and detect all import statements. This means that the logic can all live in-process rather than needing to spawn subprocesses in order to isolate the imports. It also changes the recursion semantics and removes a bunch of args related to this from the cli and main function signature. Signed-off-by: Gabe Goodhart --- import_tracker/__main__.py | 712 ++----------------------------- import_tracker/import_tracker.py | 441 +++++++++++++++---- 2 files changed, 400 insertions(+), 753 deletions(-) diff --git a/import_tracker/__main__.py b/import_tracker/__main__.py index d9010fd..3643e94 100644 --- a/import_tracker/__main__.py +++ b/import_tracker/__main__.py @@ -15,536 +15,13 @@ """ # Standard -from concurrent.futures import ThreadPoolExecutor -from types import ModuleType -from typing import Dict, List, Optional, Set, Union import argparse -import importlib -import inspect import json import logging import os -import sys -import traceback # Local -from .constants import THIS_PACKAGE, TYPE_DIRECT, TYPE_TRANSITIVE from .import_tracker import track_module -from .lazy_import_errors import enable_tracking_mode -from .lazy_module import LazyModule -from .log import log - -## Implementation Details ###################################################### - - -def _get_dylib_dir(): - """Differnet versions/builds of python manage different builtin libraries as - "builtins" versus extensions. As such, we need some heuristics to try to - find the base directory that holds shared objects from the standard library. - """ - is_dylib = lambda x: x is not None and (x.endswith(".so") or x.endswith(".dylib")) - all_mod_paths = list( - filter(is_dylib, (getattr(mod, "__file__", "") for mod in sys.modules.values())) - ) - # If there's any dylib found, return the parent directory - sample_dylib = None - if all_mod_paths: - sample_dylib = all_mod_paths[0] - else: # pragma: no cover - # If not found with the above, look through libraries that are known to - # sometimes be packaged as compiled extensions - # - # NOTE: This code may be unnecessary, but it is intended to catch future - # cases where the above does not yield results - # - # More names can be added here as needed - for lib_name in ["cmath"]: - lib = importlib.import_module(lib_name) - fname = getattr(lib, "__file__", None) - if is_dylib(fname): - sample_dylib = fname - break - - if sample_dylib is not None: - return os.path.realpath(os.path.dirname(sample_dylib)) - - # If all else fails, we'll just return a sentinel string. This will fail to - # match in the below check for builtin modules - return "BADPATH" # pragma: no cover - - -# The path where global modules are found -_std_lib_dir = os.path.realpath(os.path.dirname(os.__file__)) -_std_dylib_dir = _get_dylib_dir() - - -def _get_import_parent_path(mod) -> str: - """Get the parent directory of the given module""" - # Some standard libs have no __file__ attribute - if not hasattr(mod, "__file__"): - return _std_lib_dir - - # If the module comes from an __init__, we need to pop two levels off - file_path = mod.__file__ - if os.path.splitext(os.path.basename(mod.__file__))[0] == "__init__": - file_path = os.path.dirname(file_path) - parent_path = os.path.dirname(file_path) - return parent_path - - -def _get_non_std_modules(mod_names: Union[Set[str], Dict[str, List[dict]]]) -> Set[str]: - """Take a snapshot of the non-standard modules currently imported""" - # Determine the names from the list that are non-standard - non_std_mods = { - mod_name.split(".")[0] - for mod_name, mod in sys.modules.items() - if mod_name in mod_names - and not mod_name.startswith("_") - and "." not in mod_name - and _get_import_parent_path(mod) not in [_std_lib_dir, _std_dylib_dir] - and mod_name.split(".")[0] != THIS_PACKAGE - } - - # If this is a set, just return it directly - if isinstance(mod_names, set): - return non_std_mods - - # If it's a dict, limit to the non standard names - return { - mod_name: mod_vals - for mod_name, mod_vals in mod_names.items() - if mod_name in non_std_mods - } - - -class _DeferredModule(ModuleType): - """A _DeferredModule is a module subclass that wraps another module but imports - it lazily and then aliases __getattr__ to the lazily imported module. - """ - - def __init__(self, name: str): - """Hang onto the import args to use lazily""" - self.__name = name - self.__wrapped_module = None - - def __getattr__(self, name: str) -> any: - """When asked for an attribute, make sure the wrapped module is imported - and then delegate - """ - - if self.__wrapped_module is None: - - # If this is one of the special attributes accessed during the import - # we'll just return None and not trigger the import - if name in [ - "__name__", - "__loader__", - "__package__", - "__path__", - "__file__", - "__cached__", - ]: - log.debug4( - "Not triggering load of [%s] for getattr(%s)", self.name, name - ) - return None - - log.debug1("Triggering lazy import for %s.%s", self.name, name) - self.do_import() - - return getattr(self.__wrapped_module, name) - - @property - def imported(self) -> bool: - """Return whether or not this module has actually imported""" - return self.__wrapped_module is not None - - @property - def name(self) -> str: - """Expose the name of this module""" - return self.__name - - def do_import(self): - """Trigger the import""" - if log.level <= logging.DEBUG4: - for line in traceback.format_stack(): - log.debug4(line.strip()) - - # Remove this module from sys.modules and re-import it - log.debug2("Clearing sys.modules of parents of [%s]", self.name) - self_mod_name_parts = self.name.split(".") - popped_mods = {} - for i in range(1, len(self_mod_name_parts) + 1): - pop_mod_name = ".".join(self_mod_name_parts[:i]) - sys_mod = sys.modules.get(pop_mod_name) - if isinstance(sys_mod, self.__class__) and not sys_mod.imported: - log.debug2("Removing sys.modules[%s]", pop_mod_name) - popped_mods[pop_mod_name] = sys.modules.pop(pop_mod_name) - - log.debug2("Performing deferred import of [%s]", self.name) - self.__wrapped_module = importlib.import_module(self.name) - log.debug2("Done with deferred import of [%s]", self.name) - - # Re-decorate the popped mods to fix existing references - for popped_mod_name, popped_mod in popped_mods.items(): - updated_mod = sys.modules.get(popped_mod_name) - assert updated_mod, f"No re-imported version of [{popped_mod_name}] found" - popped_mod.__dict__.update(updated_mod.__dict__) - - def referenced_by(self, module_name: str) -> bool: - """Determine if this deferred module is referenced by the module with - the given name - """ - log.debug4( - "Checking whether [%s] is referenced by [%s]", self.__name, module_name - ) - - # Get the module in question - assert ( - module_name in sys.modules - ), f"Programming error: Ref module not found {module_name}" - ref_module = sys.modules[module_name] - ref_module_pkg = module_name.split(".")[0] - - # Search through the tree of attrs starting at this reference module to - # see if any holds a reference - mods_to_check = [ref_module] - checked_modules = [] - while mods_to_check: - next_mods_to_check = [] - for mod in mods_to_check: - for attr in vars(mod).values(): - if attr is self: - return True - - next_mods_to_check.extend( - [ - attr - for attr in vars(mod).values() - if not isinstance(attr, LazyModule) - and isinstance(attr, ModuleType) - and attr.__name__.startswith(ref_module_pkg) - and mod not in checked_modules - ] - ) - - checked_modules.append(mod) - mods_to_check = next_mods_to_check - - return False - - -class _LazyLoader(importlib.abc.Loader): - """This "loader" can be used with a MetaFinder to catch not-found modules - and raise a ModuleNotFound error lazily when the module is used rather than - at import time. - """ - - def create_module(self, spec): - return _DeferredModule(spec.name) - - def exec_module(self, *_, **__): - """Nothing to do here because the errors will be thrown by the module - created in create_module - """ - - -class ImportTrackerMetaFinder(importlib.abc.MetaPathFinder): - """The ImportTrackerMetaFinder is a meta finder that is intended to be used - at the front of the sys.meta_path to automatically track the imports for a - given library. It does this by deferring all imports which occur before the - target module has been seen, then collecting all imports seen until the - target import has completed. - """ - - def __init__( - self, - tracked_module: str, - side_effect_modules: Optional[List[str]] = None, - track_import_stack: bool = False, - ): - """Initialize with the name of the package being tracked - - Args: - tracked_module: str - The name of the module (may be nested) being tracked - side_effect_modules: Optional[List[str]] - Some libraries rely on certain import-time side effects in order - to perform required import tasks (e.g. global singleton - registries). These modules will be allowed to import regardless - of where they fall relative to the targeted module. - track_import_stack: bool - If true, when imports are allowed through, their stack trace is - captured. - NOTE: This will cause a stack trace to be computed for every - import in the tracked set, so it will be very slow and - should only be used as a debugging tool on targeted imports. - """ - self._tracked_module = tracked_module - self._side_effect_modules = side_effect_modules or [] - self._tracked_module_parts = tracked_module.split(".") - self._enabled = True - self._starting_modules = set(sys.modules.keys()) - log.debug2("Starting modules: %s", self._starting_modules) - self._ending_modules = None - self._deferred_modules = set() - self._track_import_stack = track_import_stack - self._import_stacks = {} - - def find_spec( - self, - fullname: str, - *args, - **kwargs, - ) -> Optional[importlib.machinery.ModuleSpec]: - """The find_spec implementation for this finder tracks the source of the - import call for the given module and determines if it is on the critical - path for the target module. - - https://docs.python.org/3/library/importlib.html#importlib.abc.MetaPathFinder.find_spec - - Args: - fullname: str - The fully qualified module name under import - - Returns: - spec: Optional[importlib.machinery.ModuleSpec] - If the desired import is not on the critical path for the target - module, a spec with a _DeferredLoader will be returned. If the - import is on the critical path, None will be returned to defer - to the rest of the "real" finders. - """ - # Do the main tracking logic - result = self._find_spec(fullname, *args, **kwargs) - - # If this module is deferred, return it - if result is not None: - log.debug2("Returning deferred module for [%s]", fullname) - return result - - # If this module is part of the set of modules belonging to the tracked - # module and stack tracing is enabled, grab all frames in the stack that - # come from the tracked module's package. - log.debug2( - "Stack tracking? %s, Ending modules set? %s", - self._track_import_stack, - self._ending_modules is not None, - ) - if ( - self._track_import_stack - and fullname != self._tracked_module - and not self._enabled - ): - stack = inspect.stack() - stack_info = [] - for frame in stack: - frame_module_name = frame.frame.f_globals["__name__"].split(".")[0] - if frame_module_name == self._tracked_module_parts[0]: - stack_info.append( - { - "filename": frame.filename, - "lineno": frame.lineno, - "code_context": [ - line.strip("\n") for line in frame.code_context - ], - } - ) - - # NOTE: Under certain _strange_ cases, you can end up overwriting a - # previous import stack here. I've only ever seen this happen with - # pytest internals. Also, in this case the best we can do is just - # keep the latest one. - log.debug2("Found %d stack frames for [%s]", len(stack_info), fullname) - self._import_stacks[fullname] = stack_info - - # Let the module pass through - return None - - def _find_spec( - self, fullname: str, *args, **kwargs - ) -> Optional[importlib.machinery.ModuleSpec]: - """This implements the core logic of find_spec. It is wrapped by the - public find_spec so that when an import is allowed, the stack can be - optionally tracked. - """ - - # If this module fullname is one of the modules with known side-effects, - # let it fall through - if fullname in self._side_effect_modules: - log.debug("Allowing import of side-effect module [%s]", fullname) - return None - - # If this finder is enabled and the requested import is not the target, - # defer it with a lazy module - if ( - self._enabled - and fullname != self._tracked_module - and not self._is_parent_module(fullname) - and fullname not in self._deferred_modules - and fullname.split(".")[0] == self._tracked_module_parts[0] - ): - log.debug3("Deferring import of [%s]", fullname) - self._deferred_modules.add(fullname) - loader = _LazyLoader() - return importlib.util.spec_from_loader(fullname, loader) - - # If this is the target, disable this finder for future imports - if fullname == self._tracked_module: - log.debug( - "Tracked module [%s] found. Tracking started", self._tracked_module - ) - self._enabled = False - - # Remove all lazy modules from sys.modules to force them to be - # reimported - lazy_modules = [ - mod_name - for mod_name, mod in sys.modules.items() - if isinstance(mod, _DeferredModule) - ] - for mod_name in lazy_modules: - log.debug2("Removing lazy module [%s]", mod_name) - del sys.modules[mod_name] - - # Check to see if the tracked module has finished importing and take a - # snapshot of the sys.modules if so - if self._ending_modules is None and not getattr( - getattr(sys.modules.get(self._tracked_module, {}), "__spec__", {}), - "_initializing", - True, - ): - log.debug("Tracked module [%s] finished importing", self._tracked_module) - self._set_ending_modules(fullname) - log.debug2("Ending modules: %s", self._ending_modules) - - # If downstream (inclusive) of the tracked module, let everything import - # cleanly as normal by deferring to the real finders - log.debug3("Allowing import of [%s]", fullname) - return None - - def get_all_new_modules(self) -> Set[str]: - """Get all of the imports that have happened since the start""" - assert self._starting_modules is not None, f"Target module never impoted!" - if self._ending_modules is None: - self._set_ending_modules() - mod_names = { - mod - for mod in self._ending_modules - self._starting_modules - if not self._is_parent_module(mod) - } - if self._track_import_stack: - return { - mod_name: self._import_stacks.get(mod_name, []) - for mod_name in mod_names - } - return mod_names - - ## Implementation Details ## - - def _is_parent_module(self, fullname: str) -> bool: - """Determine if the given module fullname is a direct parent of the - tracked module - """ - parts = fullname.split(".") - return self._tracked_module_parts[: len(parts)] == parts - - def _set_ending_modules(self, trigger_module_name: Optional[str] = None): - """Set the ending module set for the target""" - - # Avoid infinite recursion by setting _ending_modules to a preliminary - # empty set - self._ending_modules = {} - - # Find all attributes on existing modules which are themselves deferred - # modules and trigger their imports. This fixes the case where a module - # imports a sibling's attribute which was previously imported and - # deferred - log.debug2("Looking for deferred modules") - deferred_attrs = [] - while True: - for mod_name, mod in list(sys.modules.items()): - log.debug4("Checking module %s for deferred status", mod_name) - if mod_name.startswith(self._tracked_module.split(".")[0]): - for attr_name, attr in vars(mod).items(): - log.debug4("Checking module attr [%s.%s]", mod_name, attr_name) - if isinstance(attr, LazyModule): - log.debug2( - "Skipping lazy module attr [%s.%s]", mod_name, attr_name - ) - continue - if ( - isinstance(attr, _DeferredModule) - and not attr.imported - and attr.referenced_by(self._tracked_module) - ): - log.debug3( - "Found deferred module %s.%s", mod_name, attr_name - ) - deferred_attrs.append((mod_name, attr_name, attr)) - if not deferred_attrs: - break - for mod_name, attr_name, attr in deferred_attrs: - log.debug2("Finalizing deferred import for %s.%s", mod_name, attr_name) - attr.do_import() - log.debug2( - "Done finalizing deferred import for %s.%s", mod_name, attr_name - ) - deferred_attrs = [] - - log.debug2("Done looking for deferred modules") - - # Capture the set of imports in sys.modules (excluding the module that - # triggered this) - self._ending_modules = set(sys.modules.keys()) - {trigger_module_name} - - -def detect_transitive( - downstream_mapping: Dict[str, Union[str, dict]], -) -> Dict[str, dict]: - """For each entry in the downstream mapping, determine if it is a direct - dependency of the module or transitively inherited - """ - updated_mapping = {} - for module_name, dependencies in downstream_mapping.items(): - # Get the module from sys.modules - mod = sys.modules.get(module_name) - assert mod is not None, f"Module [{module_name}] not found in sys.modules" - - # Get a list of all modules that are directly required by looking at its - # full set of attrs - direct_mods = set( - map( - lambda x: x.split(".")[0] if x is not None else None, - [ - attr.__name__ - if isinstance(attr, ModuleType) - else getattr(attr, "__module__", None) - for attr in vars(mod).values() - ], - ) - ) - log.debug3("Direct modules for [%s]: %s", module_name, direct_mods) - - # If it's just a set of strings, make it a dict - if isinstance(dependencies, set): - dependencies = {name: {} for name in dependencies} - - # For each dependency, check whether it's in the direct list - for dep_name, dep_info in dependencies.items(): - # If the dep_info is a list, it's a stack trace - if isinstance(dep_info, list): - dep_info = {"stack": dep_info} - - dep_info["type"] = ( - TYPE_DIRECT if dep_name in direct_mods else TYPE_TRANSITIVE - ) - updated_mapping.setdefault(module_name, {})[dep_name] = dep_info - - # Make sure it's present, even with an empty list - updated_mapping.setdefault(module_name, {}) - - return updated_mapping - ## Main ######################################################################## @@ -554,7 +31,12 @@ def main(): # Set up the args parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--name", "-n", help="Module name to import", required=True) + parser.add_argument( + "--name", + "-n", + required=True, + help="Module name to track", + ) parser.add_argument( "--package", "-p", @@ -568,39 +50,12 @@ def main(): help="Indent for json printing", default=None, ) - parser.add_argument( - "--log_level", - "-l", - help="Log level", - default=os.environ.get("LOG_LEVEL", "error"), - ) - parser.add_argument( - "--recursive", - "-r", - action="store_true", - help="Recursively perform tracking on all nested modules", - default=False, - ) parser.add_argument( "--submodules", - "-u", - nargs="*", - default=None, - help="List of sub-modules to recurse on (only used when --recursive set)", - ) - parser.add_argument( - "--num_jobs", - "-j", - type=int, - help="Number of workers to spawn when recursing", - default=0, - ) - parser.add_argument( - "--side_effect_modules", "-s", nargs="*", default=None, - help="Modules with known import-time side effect which should always be allowed to import", + help="List of subodules to include (all if no value given)", ) parser.add_argument( "--track_import_stack", @@ -610,21 +65,26 @@ def main(): help="Store the stack trace of imports belonging to the tracked module", ) parser.add_argument( - "--detect_transitive", - "-d", + "--include_transitive", + "-r", action="store_true", default=False, - help="Detect whether each dependency is 'direct' or 'transitive'", + help="Include transitive third-party deps brought in by direct third-party deps", + ) + parser.add_argument( + "--log-level", + "-l", + default=os.environ.get("LOG_LEVEL", "warning"), + help="Default log level", ) args = parser.parse_args() - # Validate sets of args - if args.submodules and not args.recursive: - raise ValueError("Ignoring --submodules without --recursive") - - # Mark the environment as tracking mode so that any lazy import errors are - # disabled - enable_tracking_mode() + # Determine the submodules argument value + submodules = ( + False + if args.submodules is None + else (args.submodules if args.submodules else True) + ) # Set the level on the shared logger log_level = getattr(logging, args.log_level.upper(), None) @@ -632,123 +92,19 @@ def main(): log_level = int(args.log_level) logging.basicConfig(level=log_level) - # Determine the unqualified module name and use it elsewhere - full_module_name = args.name - if args.package is not None: - assert args.name.startswith( - "." - ), "When providing --package, module name must be relative (start with '.')" - full_module_name = f"{args.package}{args.name}" - - # Create the tracking meta finder - tracker_finder = ImportTrackerMetaFinder( - tracked_module=full_module_name, - side_effect_modules=args.side_effect_modules, - track_import_stack=args.track_import_stack, - ) - sys.meta_path = [tracker_finder] + sys.meta_path - - # Do the import - log.debug("Importing %s", full_module_name) - try: - imported = importlib.import_module(full_module_name) - except Exception as err: - log.error("Error on top-level import [%s]: %s", full_module_name, err) - raise - - # Set up the mapping with the external downstreams for the tracked package - downstream_mapping = { - full_module_name: _get_non_std_modules(tracker_finder.get_all_new_modules()) - } - - # If tracking direct vs transitive, update the mapping here - if args.detect_transitive: - log.debug("Tracking direct vs transitive") - downstream_mapping = detect_transitive(downstream_mapping) - - # If recursing, do so now by spawning a subprocess for each internal - # downstream. This must be done in a separate interpreter instance so that - # the imports are cleanly reset for each downstream. - if args.recursive: - all_internals = [ - downstream - for downstream in sys.modules.keys() - if downstream.startswith(full_module_name) - and downstream != full_module_name - ] - - # If a list of submodules was given, limit the recursion to only those - # internals found in that list - recursive_internals = all_internals - if args.submodules: - recursive_internals = [ - downstream - for downstream in all_internals - if downstream in args.submodules - ] - log.debug("Recursing on: %s", recursive_internals) - - # Set up the kwargs for recursing - recursive_kwargs = dict( - log_level=log_level, - recursive=False, - side_effect_modules=args.side_effect_modules, - track_import_stack=args.track_import_stack, - detect_transitive=args.detect_transitive, + # Perform the tracking and print out the output + print( + json.dumps( + track_module( + module_name=args.name, + package_name=args.package, + submodules=submodules, + track_import_stack=args.track_import_stack, + include_transitive=args.include_transitive, + ), + indent=args.indent, ) - - # Create the thread pool to manage the subprocesses - if args.num_jobs > 0: - pool = ThreadPoolExecutor(max_workers=args.num_jobs) - futures = [] - for internal_downstream in recursive_internals: - futures.append( - pool.submit( - track_module, - module_name=internal_downstream, - **recursive_kwargs, - ) - ) - - # Wait for all futures to complete and merge into the mapping - for future in futures: - downstream_mapping.update(future.result()) - - else: - for internal_downstream in recursive_internals: - try: - log.debug( - "Starting sub-module tracking for [%s]", internal_downstream - ) - downstream_mapping.update( - track_module( - module_name=internal_downstream, **recursive_kwargs - ) - ) - - # This is useful for catching errors caused by unexpected corner - # cases. If it's triggered, it's a sign of a bug in the library, - # so we don't have ways to explicitly exercise this in tests. - except Exception as err: # pragma: no cover - log.error( - "Error while tracking submodule [%s]: %s", - internal_downstream, - err, - ) - raise - - # Get all of the downstreams for the module in question, including internals - log.debug("Downstream Mapping: %s", downstream_mapping) - - # Set up the output dict depending on whether or not the stack info is being - # tracked - output_dict = { - key: dict(sorted(val.items())) if isinstance(val, dict) else sorted(list(val)) - for key, val in downstream_mapping.items() - } - - # Print out the json dump - print(json.dumps(output_dict, indent=args.indent)) + ) if __name__ == "__main__": diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index 89ba288..5effeaf 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -2,15 +2,12 @@ This module implements utilities that enable tracking of third party deps through import statements """ - # Standard -from typing import Dict, List, Optional -import copy -import json -import logging +from types import ModuleType +from typing import Dict, List, Optional, Set, Union +import dis +import importlib import os -import shlex -import subprocess import sys # Local @@ -23,18 +20,11 @@ def track_module( module_name: str, package_name: Optional[str] = None, - log_level: Optional[int] = None, - recursive: bool = False, - num_jobs: int = 0, - side_effect_modules: Optional[List[str]] = None, - submodules: Optional[List[str]] = None, + submodules: Union[List[str], bool] = False, track_import_stack: bool = False, - detect_transitive: bool = False, -) -> Dict[str, List[str]]: - """This function executes the tracking of a single module by launching a - subprocess to execute this module against the target module. The - implementation of thie tracking resides in the __main__ in order to - carefully control the import ecosystem. + include_transitive: bool = False, +) -> dict: + """Track the dependencies of a single python module Args: module_name: str @@ -42,66 +32,367 @@ def track_module( provided) package_name: Optional[str] The parent package name of the module if the module name is relative - log_level: Optional[Union[int, str]] - Log level to pass through to the child process - recursive: bool - Whether or not to recursively track sub-modules within the target - module - num_jobs: int - The number of concurrent jobs to run when recursing - side_effect_modules: Optional[List[str]] - Some libraries rely on certain import-time side effects in order to - perform required import tasks (e.g. global singleton registries). - These modules will be allowed to import regardless of where they - fall relative to the targeted module. - submodules: Optional[List[str]] - List of sub-modules to recurse on (only used when recursive set) + submodules: Union[List[str], bool] + If True, all submodules of the given module will also be tracked. If + given as a list of strings, only those submodules will be tracked. + If False, only the named module will be tracked. track_import_stack: bool - Store the stack trace of imports belonging to the tracked module - detect_transitive: bool - Detect whether each dependency is 'direct' or 'transitive' + Store the stacks of modules causing each dependency of each tracked + module for debugging purposes. + include_transitive: bool + Include transitive dependencies of the third party dependencies that + are direct dependencies of modules within the target module's parent + library. Returns: - import_mapping: Dict[str, List[str]] + import_mapping: dict The mapping from fully-qualified module name to the set of imports - needed by the given module + needed by the given module. If tracking import stacks, each + dependency of a given module maps to a list of lists that each + outline one path through imported modules that caused the given + dependency association. """ - # Set up the environment with all sys.paths available (included those added - # in code) - env = dict(copy.deepcopy(os.environ)) - env["PYTHONPATH"] = ":".join(sys.path) - - # Determine the log level - if isinstance(log_level, str): - log_level = getattr(logging, log_level.upper(), None) - if log_level is None: - log_level = os.environ.get("LOG_LEVEL", "error") - - # Set up the command to run this module - cmd = cmd = "{} -W ignore -m {} --name {} --log_level {} --num_jobs {}".format( - sys.executable, - THIS_PACKAGE, - module_name, - log_level, - num_jobs, - ) - if package_name is not None: - cmd += f" --package {package_name}" - if recursive: - cmd += " --recursive" - if side_effect_modules: - cmd += " --side_effect_modules " + " ".join(side_effect_modules) + + # Import the target module + imported = importlib.import_module(module_name, package=package_name) + + # Recursively build the mapping + module_deps_map = dict() + modules_to_check = {imported} + checked_modules = set() + tracked_module_root_pkg = module_name.partition(".")[0] + while modules_to_check: + next_modules_to_check = set() + for module_to_check in modules_to_check: + + # Figure out all direct imports from this module + module_imports = _get_imports(module_to_check) + module_import_names = {mod.__name__ for mod in module_imports} + log.debug3( + "Full import names for [%s]: %s", + module_to_check.__name__, + module_import_names, + ) + + # Trim to just non-standard modules + non_std_module_names = _get_non_std_modules(module_import_names) + log.debug3("Non std module names: %s", non_std_module_names) + non_std_module_imports = [ + mod for mod in module_imports if mod.__name__ in non_std_module_names + ] + + module_deps_map[module_to_check.__name__] = non_std_module_names + log.debug2( + "Deps for [%s] -> %s", + module_to_check.__name__, + non_std_module_names, + ) + + # Add each of these modules to the next round of modules to check if + # it has not yet been checked + next_modules_to_check = next_modules_to_check.union( + { + mod + for mod in non_std_module_imports + if ( + mod not in checked_modules + and ( + include_transitive + or mod.__name__.partition(".")[0] == tracked_module_root_pkg + ) + ) + } + ) + + # Mark this module as checked + checked_modules.add(module_to_check) + + # Set the next iteration + log.debug3("Next modules to check: %s", next_modules_to_check) + modules_to_check = next_modules_to_check + + log.debug3("Full module dep mapping: %s", module_deps_map) + + # Determine all the modules we want the final answer for + output_mods = {module_name} if submodules: - cmd += " --submodules " + " ".join(submodules) - if track_import_stack: - cmd += " --track_import_stack" - if detect_transitive: - cmd += " --detect_transitive" - - # Launch the process - log.debug3("Full Command: %s", cmd) - proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, env=env) - - # Wait for the result and parse it as json - result, _ = proc.communicate() - return json.loads(result) + output_mods = output_mods.union( + { + mod + for mod in module_deps_map + if ( + (submodules is True and mod.startswith(module_name)) + or (submodules is not True and mod in submodules) + ) + } + ) + log.debug2("Output modules: %s", output_mods) + + # Flatten each of the output mods' dependency lists + deps_out = {mod: _flatten_deps(mod, module_deps_map) for mod in output_mods} + log.debug("Raw output deps map: %s", deps_out) + if not track_import_stack: + deps_out = {mod: list(deps.keys()) for mod, deps in deps_out.items()} + return deps_out + + +## Private ##################################################################### + + +def _get_dylib_dir(): + """Differnet versions/builds of python manage different builtin libraries as + "builtins" versus extensions. As such, we need some heuristics to try to + find the base directory that holds shared objects from the standard library. + """ + is_dylib = lambda x: x is not None and (x.endswith(".so") or x.endswith(".dylib")) + all_mod_paths = list( + filter(is_dylib, (getattr(mod, "__file__", "") for mod in sys.modules.values())) + ) + # If there's any dylib found, return the parent directory + sample_dylib = None + if all_mod_paths: + sample_dylib = all_mod_paths[0] + else: # pragma: no cover + # If not found with the above, look through libraries that are known to + # sometimes be packaged as compiled extensions + # + # NOTE: This code may be unnecessary, but it is intended to catch future + # cases where the above does not yield results + # + # More names can be added here as needed + for lib_name in ["cmath"]: + lib = importlib.import_module(lib_name) + fname = getattr(lib, "__file__", None) + if is_dylib(fname): + sample_dylib = fname + break + + if sample_dylib is not None: + return os.path.realpath(os.path.dirname(sample_dylib)) + + # If all else fails, we'll just return a sentinel string. This will fail to + # match in the below check for builtin modules + return "BADPATH" # pragma: no cover + + +# The path where global modules are found +_std_lib_dir = os.path.realpath(os.path.dirname(os.__file__)) +_std_dylib_dir = _get_dylib_dir() + + +def _mod_defined_in_init_file(mod: ModuleType) -> bool: + """Determine if the given module is defined in an __init__.py[c]""" + mod_file = getattr(mod, "__file__", None) + if mod_file is None: + return False + return os.path.splitext(os.path.basename(mod_file))[0] == "__init__" + + +def _get_import_parent_path(mod_name: str) -> str: + """Get the parent directory of the given module""" + mod = sys.modules[mod_name] # NOTE: Intentionally unsafe to raise if not there! + + # Some standard libs have no __file__ attribute + file_path = getattr(mod, "__file__", None) + if file_path is None: + return _std_lib_dir + + # If the module comes from an __init__, we need to pop two levels off + if _mod_defined_in_init_file(mod): + file_path = os.path.dirname(file_path) + parent_path = os.path.dirname(file_path) + return parent_path + + +def _get_non_std_modules(mod_names: Union[Set[str], Dict[str, List[dict]]]) -> Set[str]: + """Take a snapshot of the non-standard modules currently imported""" + # Determine the names from the list that are non-standard + non_std_mods = { + mod_name + for mod_name in mod_names + if not mod_name.startswith("_") + and ( + mod_name not in sys.modules + or _get_import_parent_path(mod_name) not in [_std_lib_dir, _std_dylib_dir] + ) + and mod_name.partition(".")[0] != THIS_PACKAGE + } + + # If this is a set, just return it directly + if isinstance(mod_names, (set, list)): + return non_std_mods + + # If it's a dict, limit to the non standard names + return { + mod_name: mod_vals + for mod_name, mod_vals in mod_names.items() + if mod_name in non_std_mods + } + + +def _get_value_col(dis_line: str) -> str: + loc = dis_line.find("(") + if loc >= 0: + return dis_line[loc + 1 : -1] + return "" + + +def _figure_out_import( + mod: ModuleType, + dots: Optional[int], + import_name: Optional[str], + import_from: Optional[str], +) -> ModuleType: + log.debug2("Figuring out import [%s/%s/%s]", dots, import_name, import_from) + + # If there are no dots, look for candidate absolute imports + if not dots: + if import_name in sys.modules: + if import_from is not None: + candidate = f"{import_name}.{import_from}" + if candidate in sys.modules: + log.debug3("Found [%s] in sys.modules", candidate) + return sys.modules[candidate] + log.debug3("Found [%s] in sys.modules", import_name) + return sys.modules[import_name] + + # Try simulating a relative import from a non-relative local + dots = dots or 1 + + # If there are dots, figure out the parent + parent_mod_name_parts = mod.__name__.split(".") + if dots > 1: + parent_dots = dots - 1 if _mod_defined_in_init_file(mod) else dots + root_mod_name = ".".join(parent_mod_name_parts[:-parent_dots]) + else: + root_mod_name = mod.__name__ + log.debug3("Parent mod name parts: %s", parent_mod_name_parts) + log.debug3("Num Dots: %d", dots) + log.debug3("Root mod name: %s", root_mod_name) + log.debug3("Module file: %s", getattr(mod, "__file__", None)) + if not import_name: + import_name = root_mod_name + else: + import_name = f"{root_mod_name}.{import_name}" + + # Try with the import_from attached. This might be a module name or a + # non-module attribute, so this might not work + if not import_name: + full_import_candidate = import_from + else: + full_import_candidate = f"{import_name}.{import_from}" + if full_import_candidate in sys.modules: + return sys.modules[full_import_candidate] + + # If that didn't work, the from is an attribute, so just get the import name + return sys.modules.get(import_name) + + +def _get_imports(mod: ModuleType) -> Set[ModuleType]: + """Get the list of import string from a module by parsing the module's + bytecode + """ + log.debug2("Getting imports for %s", mod.__name__) + all_imports = set() + + # Attempt to disassemble the byte code for this module. If the module has no + # code, we ignore it since it's most likely a c extension + try: + loader = mod.__loader__ or mod.__spec__.loader + mod_code = loader.get_code(mod.__name__) + except ImportError: + log.debug2("Could not get_code(%s)", mod.__name__) + return all_imports + except AttributeError: + log.warning("Couldn't find a loader for %s!", mod.__name__) + return all_imports + if mod_code is None: + log.debug2("No code object found for %s", mod.__name__) + return all_imports + bcode = dis.Bytecode(mod_code) + + # Parse all bytecode lines + current_dots = None + current_import_name = None + current_import_from = None + open_import = False + log.debug4("Byte Code:") + for line in bcode.dis().split("\n"): + log.debug4(line) + line_val = _get_value_col(line) + if "LOAD_CONST" in line: + if line_val.isnumeric(): + current_dots = int(line_val) + elif "IMPORT_NAME" in line: + open_import = True + if line_val.isnumeric(): + current_dots = int(line_val) + else: + current_import_name = line_val + elif "IMPORT_FROM" in line: + open_import = True + current_import_from = line_val + else: + # This closes an import, so figure out what the module is that is + # being imported! + if open_import: + import_mod = _figure_out_import( + mod, current_dots, current_import_name, current_import_from + ) + if import_mod is not None: + log.debug2("Adding import module [%s]", import_mod.__name__) + all_imports.add(import_mod) + + # If this is a STORE_NAME, subsequent "from" statements may use the + # same dots and name + if "STORE_NAME" not in line: + current_dots = None + current_import_name = None + open_import = False + current_import_from = None + + # If there's an open import at the end, close it + if open_import: + import_mod = _figure_out_import( + mod, current_dots, current_import_name, current_import_from + ) + if import_mod is not None: + log.debug2("Adding import module [%s]", import_mod.__name__) + all_imports.add(import_mod) + + return all_imports + + +def _flatten_deps( + module_name: str, + module_deps_map: Dict[str, List[str]], +) -> Dict[str, List[str]]: + """Flatten the names of all modules that the target module depends on""" + all_deps = {} + mods_to_check = {module_name: []} + while mods_to_check: + next_mods_to_check = {} + for mod_to_check, parent_path in mods_to_check.items(): + mod_path = parent_path + [mod_to_check] + mod_deps = set(module_deps_map.get(mod_to_check, [])) + log.debug4("Mod deps for %s: %s", mod_to_check, mod_deps) + new_mods = mod_deps - set(all_deps.keys()) + next_mods_to_check.update({new_mod: mod_path for new_mod in new_mods}) + for mod_dep in mod_deps: + all_deps.setdefault(mod_dep, []).append(mod_path) + log.debug3("Next mods to check: %s", next_mods_to_check) + mods_to_check = next_mods_to_check + mod_base_name = module_name.partition(".")[0] + flat_base_deps = {} + for dep, dep_sources in all_deps.items(): + if not dep.startswith(mod_base_name): + # Truncate the dep_sources entries and trim to avoid duplicates + dep_root_mod_name = dep.partition(".")[0] + flat_dep_sources = flat_base_deps.setdefault(dep_root_mod_name, []) + for dep_source in dep_sources: + flat_dep_source = dep_source + if dep_root_mod_name in dep_source: + flat_dep_source = dep_source[: dep_source.index(dep_root_mod_name)] + if flat_dep_source not in flat_dep_sources: + flat_dep_sources.append(flat_dep_source) + return flat_base_deps From ccaacbf8757a33a76a26e5f1a610b6b5f4efbe13 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Fri, 22 Jul 2022 20:44:42 -0600 Subject: [PATCH 02/26] LogicOverhaul: Add detect_transitive back in Signed-off-by: Gabe Goodhart --- import_tracker/__main__.py | 16 +++++-- import_tracker/import_tracker.py | 72 +++++++++++++++++++++++++++----- 2 files changed, 73 insertions(+), 15 deletions(-) diff --git a/import_tracker/__main__.py b/import_tracker/__main__.py index 3643e94..f8db879 100644 --- a/import_tracker/__main__.py +++ b/import_tracker/__main__.py @@ -65,14 +65,21 @@ def main(): help="Store the stack trace of imports belonging to the tracked module", ) parser.add_argument( - "--include_transitive", - "-r", + "--full_depth", + "-f", action="store_true", default=False, help="Include transitive third-party deps brought in by direct third-party deps", ) parser.add_argument( - "--log-level", + "--detect_transitive", + "-d", + action="store_true", + default=False, + help="Detect whether each dependency is 'direct' or 'transitive'", + ) + parser.add_argument( + "--log_level", "-l", default=os.environ.get("LOG_LEVEL", "warning"), help="Default log level", @@ -100,7 +107,8 @@ def main(): package_name=args.package, submodules=submodules, track_import_stack=args.track_import_stack, - include_transitive=args.include_transitive, + full_depth=args.full_depth, + detect_transitive=args.detect_transitive, ), indent=args.indent, ) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index 5effeaf..b695988 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -11,7 +11,7 @@ import sys # Local -from .constants import THIS_PACKAGE +from .constants import THIS_PACKAGE, TYPE_DIRECT, TYPE_TRANSITIVE from .log import log ## Public ###################################################################### @@ -22,7 +22,8 @@ def track_module( package_name: Optional[str] = None, submodules: Union[List[str], bool] = False, track_import_stack: bool = False, - include_transitive: bool = False, + full_depth: bool = False, + detect_transitive: bool = False, ) -> dict: """Track the dependencies of a single python module @@ -39,10 +40,12 @@ def track_module( track_import_stack: bool Store the stacks of modules causing each dependency of each tracked module for debugging purposes. - include_transitive: bool + full_depth: bool Include transitive dependencies of the third party dependencies that are direct dependencies of modules within the target module's parent library. + detect_transitive: bool + Detect whether each dependency is 'direct' or 'transitive' Returns: import_mapping: dict @@ -55,12 +58,13 @@ def track_module( # Import the target module imported = importlib.import_module(module_name, package=package_name) + full_module_name = imported.__name__ # Recursively build the mapping module_deps_map = dict() modules_to_check = {imported} checked_modules = set() - tracked_module_root_pkg = module_name.partition(".")[0] + tracked_module_root_pkg = full_module_name.partition(".")[0] while modules_to_check: next_modules_to_check = set() for module_to_check in modules_to_check: @@ -97,13 +101,33 @@ def track_module( if ( mod not in checked_modules and ( - include_transitive + full_depth or mod.__name__.partition(".")[0] == tracked_module_root_pkg ) ) } ) + # Also check modules with intermediate names + parent_mods = set() + for mod in next_modules_to_check: + mod_name_parts = mod.__name__.split(".") + for parent_mod_name in [ + ".".join(mod_name_parts[: i + 1]) + for i in range(len(mod_name_parts)) + ]: + parent_mod = sys.modules.get(parent_mod_name) + if parent_mod is None: + log.warning( + "Could not find parent module %s of %s", + parent_mod_name, + mod.__name__, + ) + continue + if parent_mod not in checked_modules: + parent_mods.add(parent_mod) + next_modules_to_check = next_modules_to_check.union(parent_mods) + # Mark this module as checked checked_modules.add(module_to_check) @@ -114,14 +138,14 @@ def track_module( log.debug3("Full module dep mapping: %s", module_deps_map) # Determine all the modules we want the final answer for - output_mods = {module_name} + output_mods = {full_module_name} if submodules: output_mods = output_mods.union( { mod for mod in module_deps_map if ( - (submodules is True and mod.startswith(module_name)) + (submodules is True and mod.startswith(full_module_name)) or (submodules is not True and mod in submodules) ) } @@ -129,10 +153,36 @@ def track_module( log.debug2("Output modules: %s", output_mods) # Flatten each of the output mods' dependency lists - deps_out = {mod: _flatten_deps(mod, module_deps_map) for mod in output_mods} - log.debug("Raw output deps map: %s", deps_out) - if not track_import_stack: - deps_out = {mod: list(deps.keys()) for mod, deps in deps_out.items()} + flattened_deps = {mod: _flatten_deps(mod, module_deps_map) for mod in output_mods} + log.debug("Raw output deps map: %s", flattened_deps) + + # If detecting transitive deps, look through the stacks and mark each dep as + # transitive or direct + if not detect_transitive and not track_import_stack: + deps_out = { + mod: list(sorted(deps.keys())) for mod, deps in flattened_deps.items() + } + else: + deps_out = {} + + if detect_transitive: + for mod, deps in flattened_deps.items(): + for dep_name, dep_stacks in deps.items(): + deps_out.setdefault(mod, {}).setdefault(dep_name, {})["type"] = ( + TYPE_DIRECT + if any(len(dep_stack) == 1 for dep_stack in dep_stacks) + else TYPE_TRANSITIVE + ) + + # If tracking import stacks, move them to the "stack" key in the output + if track_import_stack: + for mod, deps in flattened_deps.items(): + for dep_name, dep_stacks in deps.items(): + deps_out.setdefault(mod, {}).setdefault(dep_name, {})[ + "stack" + ] = dep_stacks + + log.debug("Final output: %s", deps_out) return deps_out From 4b04d99dd63fbc59aa40710a10050840d4d8dba4 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Fri, 22 Jul 2022 20:45:46 -0600 Subject: [PATCH 03/26] LogicOverhaul: Tweaks to setup_tools to make the logic and tests work There were a few tests in here that really seem like they were broken? Signed-off-by: Gabe Goodhart --- import_tracker/setup_tools.py | 10 +++++++--- test/sample_libs/direct_dep_nested/nested.py | 3 +++ test/test_setup_tools.py | 7 +++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/import_tracker/setup_tools.py b/import_tracker/setup_tools.py index 1d7fc2a..74ab7e1 100644 --- a/import_tracker/setup_tools.py +++ b/import_tracker/setup_tools.py @@ -11,7 +11,7 @@ import sys # Local -from .constants import THIS_PACKAGE, TYPE_DIRECT +from .constants import TYPE_DIRECT from .import_tracker import track_module from .log import log @@ -66,7 +66,7 @@ def parse_requirements( # Get the set of required modules for each of the listed extras modules library_import_mapping = track_module( library_name, - recursive=True, + submodules=True, detect_transitive=True, **kwargs, ) @@ -124,6 +124,10 @@ def parse_requirements( if parent is None: non_extra_union = non_extra_union.union(import_set) break + common_intersection = common_intersection or set() + if len(extras_modules) == 1: + common_intersection = set() + log.debug3("Raw common intersection: %s", common_intersection) # Add direct dependencies of all parent modules to the non_extra_union for module_name, imports_info in library_import_mapping.items(): @@ -177,7 +181,7 @@ def parse_requirements( standardized_requirements = { key.replace("-", "_"): val for key, val in requirements.items() } - return _map_requirements(standardized_requirements, common_imports), { + return sorted(_map_requirements(standardized_requirements, common_imports)), { set_name: _map_requirements(standardized_requirements, import_set) for set_name, import_set in extras_require_sets.items() } diff --git a/test/sample_libs/direct_dep_nested/nested.py b/test/sample_libs/direct_dep_nested/nested.py index e83a547..d859a99 100644 --- a/test/sample_libs/direct_dep_nested/nested.py +++ b/test/sample_libs/direct_dep_nested/nested.py @@ -1,2 +1,5 @@ +# Third Party +import yaml + # Local from sample_lib import submod1 diff --git a/test/test_setup_tools.py b/test/test_setup_tools.py index e33b9d0..e245a78 100644 --- a/test/test_setup_tools.py +++ b/test/test_setup_tools.py @@ -133,7 +133,6 @@ def test_parse_requirements_with_side_effects(): parse_requirements( requirements=sample_lib_requirements, library_name="side_effects", - side_effect_modules=["side_effects.global_thing"], ) @@ -156,7 +155,7 @@ def test_single_extras_module(): "single_extra", ["single_extra.extra"], ) - assert requirements == ["alchemy-logging"] + assert requirements == sorted(["alchemy-logging"]) assert extras_require == { "all": sorted(["alchemy-logging", "PyYaml"]), "single_extra.extra": ["PyYaml"], @@ -188,9 +187,9 @@ def test_nested_deps(): "direct_dep_nested", ["direct_dep_nested.nested", "direct_dep_nested.nested2"], ) - assert requirements == sorted(["alchemy-logging", "sample_lib"]) + assert requirements == sorted(["sample_lib"]) assert extras_require == { "all": sorted(["sample_lib", "PyYaml", "alchemy-logging"]), "direct_dep_nested.nested": sorted(["PyYaml"]), - "direct_dep_nested.nested2": sorted([]), + "direct_dep_nested.nested2": sorted(["alchemy-logging"]), } From 1c205d31bb79310853441cdd2e9a2ef07662fc79 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Fri, 22 Jul 2022 20:46:10 -0600 Subject: [PATCH 04/26] LogicOverhaul: Get test_import_tracker all passing Signed-off-by: Gabe Goodhart --- test/test_import_tracker.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index 6d5e41d..3e98ef4 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -61,9 +61,7 @@ def test_track_module_recursive(): NOTE: The num_jobs simply exercises that code as there's no real way to validate the parallelism """ - sample_lib_mapping = import_tracker.track_module( - "sample_lib", recursive=True, num_jobs=2 - ) + sample_lib_mapping = import_tracker.track_module("sample_lib", submodules=True) assert sample_lib_mapping == { "sample_lib": sorted(["conditional_deps", "alog", "yaml"]), "sample_lib.submod1": ["conditional_deps"], @@ -73,19 +71,10 @@ def test_track_module_recursive(): } -def test_track_module_with_log_level(): - """Test that a log level can be given to track_module""" - sample_lib_mapping = import_tracker.track_module( - "sample_lib.submod1", log_level="error" - ) - assert sample_lib_mapping == {"sample_lib.submod1": ["conditional_deps"]} - - def test_track_module_with_limited_submodules(): """Test that the submodules arg can be passed through""" sample_lib_mapping = import_tracker.track_module( "sample_lib", - recursive=True, submodules=["sample_lib.submod1"], ) assert sample_lib_mapping == { From d166030f49a8cce5afa0907d8709fd3b63d6a18c Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Sat, 23 Jul 2022 14:59:20 -0600 Subject: [PATCH 05/26] LogicOverhaul: Handle single-dot when not defined in __init__ Signed-off-by: Gabe Goodhart --- import_tracker/import_tracker.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index b695988..f4a3502 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -57,6 +57,7 @@ def track_module( """ # Import the target module + log.debug("Importing %s.%s", package_name, module_name) imported = importlib.import_module(module_name, package=package_name) full_module_name = imported.__name__ @@ -311,11 +312,14 @@ def _figure_out_import( # If there are dots, figure out the parent parent_mod_name_parts = mod.__name__.split(".") + defined_in_init = _mod_defined_in_init_file(mod) if dots > 1: - parent_dots = dots - 1 if _mod_defined_in_init_file(mod) else dots + parent_dots = dots - 1 if defined_in_init else dots root_mod_name = ".".join(parent_mod_name_parts[:-parent_dots]) - else: + elif defined_in_init: root_mod_name = mod.__name__ + else: + root_mod_name = ".".join(parent_mod_name_parts[:-1]) log.debug3("Parent mod name parts: %s", parent_mod_name_parts) log.debug3("Num Dots: %d", dots) log.debug3("Root mod name: %s", root_mod_name) From cf2db4807b55c20cfd8999c89fea7a21834c5c10 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Sat, 23 Jul 2022 15:10:04 -0600 Subject: [PATCH 06/26] LogicOverhaul: Use conftest for autouse fixtures Signed-off-by: Gabe Goodhart --- test/conftest.py | 43 +++++++++++++++++++++++++++++++ test/helpers.py | 45 --------------------------------- test/test_import_tracker.py | 1 - test/test_lazy_import_errors.py | 1 - test/test_lazy_module.py | 1 - test/test_main.py | 2 -- test/test_setup_tools.py | 1 - 7 files changed, 43 insertions(+), 51 deletions(-) create mode 100644 test/conftest.py diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..42b417a --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,43 @@ +""" +Global autouse fixtures that will be used by all tests +""" + +# Standard +import logging +import os +import sys + +# Third Party +import pytest + +# Local +from import_tracker.log import log + + +@pytest.fixture(autouse=True) +def configure_logging(): + """Fixture that configures logging from the env. It is auto-used, so if + imported, it will automatically configure for each test. + + NOTE: The import of alog is inside the function since alog is used as a + sample package for lazy importing in some tests + """ + logging.basicConfig() + log.root.setLevel(getattr(logging, os.environ.get("LOG_LEVEL", "warning").upper())) + + +@pytest.fixture(autouse=True) +def reset_sys_modules(): + """This fixture will reset the sys.modules dict to only the keys held before + the test initialized + """ + before_keys = list(sys.modules.keys()) + yield + added_keys = [ + module_name + for module_name in sys.modules.keys() + if module_name not in before_keys + ] + for added_key in added_keys: + mod = sys.modules.pop(added_key) + del mod diff --git a/test/helpers.py b/test/helpers.py index 03e3721..fd47c26 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -2,51 +2,6 @@ Shared test helpers """ -# Standard -from contextlib import contextmanager -import os -import sys - -# Third Party -import pytest - -# Local -import import_tracker - - -@pytest.fixture(autouse=True) -def configure_logging(): - """Fixture that configures logging from the env. It is auto-used, so if - imported, it will automatically configure for each test. - - NOTE: The import of alog is inside the function since alog is used as a - sample package for lazy importing in some tests - """ - # First Party - import alog - - alog.configure( - default_level=os.environ.get("LOG_LEVEL", "info"), - filters=os.environ.get("LOG_FILTERS", ""), - ) - - -@pytest.fixture(autouse=True) -def reset_sys_modules(): - """This fixture will reset the sys.modules dict to only the keys held before - the test initialized - """ - before_keys = list(sys.modules.keys()) - yield - added_keys = [ - module_name - for module_name in sys.modules.keys() - if module_name not in before_keys - ] - for added_key in added_keys: - mod = sys.modules.pop(added_key) - del mod - def remove_test_deps(deps): """If running with pytest coverage enabled, these deps will show up. We diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index 3e98ef4..e01fc96 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -14,7 +14,6 @@ import pytest # Local -from test.helpers import remove_test_deps, reset_sys_modules import import_tracker ## Package API ################################################################# diff --git a/test/test_lazy_import_errors.py b/test/test_lazy_import_errors.py index f53df11..6b73f03 100644 --- a/test/test_lazy_import_errors.py +++ b/test/test_lazy_import_errors.py @@ -15,7 +15,6 @@ # Local from import_tracker.lazy_import_errors import _LazyErrorMetaFinder -from test.helpers import reset_sys_modules import import_tracker diff --git a/test/test_lazy_module.py b/test/test_lazy_module.py index a72a604..af7bcf1 100644 --- a/test/test_lazy_module.py +++ b/test/test_lazy_module.py @@ -10,7 +10,6 @@ # Local from import_tracker.lazy_module import LazyModule -from test.helpers import reset_sys_modules def test_lazy_module_valid_import(): diff --git a/test/test_main.py b/test/test_main.py index 44eb65b..dc86459 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -13,10 +13,8 @@ import pytest # Local -from .helpers import reset_sys_modules from import_tracker import constants from import_tracker.__main__ import main -import import_tracker ## Helpers ##################################################################### diff --git a/test/test_setup_tools.py b/test/test_setup_tools.py index e245a78..7d5ecc4 100644 --- a/test/test_setup_tools.py +++ b/test/test_setup_tools.py @@ -10,7 +10,6 @@ import pytest # Local -from .helpers import configure_logging from import_tracker.setup_tools import parse_requirements sample_lib_requirements = [ From 9794a2a92e33ce8a855e3f9eda0a7967478368de Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Sat, 23 Jul 2022 15:59:30 -0600 Subject: [PATCH 07/26] LogicOverhaul: Handle parent direct deps in track_module Signed-off-by: Gabe Goodhart --- import_tracker/import_tracker.py | 65 +++++++++++++++++++++++++++++++- import_tracker/setup_tools.py | 10 ----- test/test_import_tracker.py | 31 +++++++++++++++ 3 files changed, 94 insertions(+), 12 deletions(-) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index f4a3502..8bcae87 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -153,8 +153,14 @@ def track_module( ) log.debug2("Output modules: %s", output_mods) + # Add parent direct deps to the module deps map + parent_direct_deps = _add_parent_direct_deps(module_deps_map) + # Flatten each of the output mods' dependency lists - flattened_deps = {mod: _flatten_deps(mod, module_deps_map) for mod in output_mods} + flattened_deps = { + mod: _flatten_deps(mod, module_deps_map, parent_direct_deps) + for mod in output_mods + } log.debug("Raw output deps map: %s", flattened_deps) # If detecting transitive deps, look through the stacks and mark each dep as @@ -417,25 +423,80 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: return all_imports +def _add_parent_direct_deps( + module_deps_map: Dict[str, List[str]] +) -> Dict[str, Dict[str, List[str]]]: + """Augment the dependencies of each module in the module deps map with any + third-party dependencies that are directly imported by parent modules. + """ + + parent_direct_deps = {} + for mod_name, mod_deps in module_deps_map.items(): + + # Look through all parent modules of module_name and aggregate all + # third-party deps that are directly used by those modules + mod_base_name = mod_name.partition(".")[0] + mod_name_parts = mod_name.split(".") + for i in range(1, len(mod_name_parts)): + parent_mod_name = ".".join(mod_name_parts[:i]) + parent_deps = module_deps_map.get(parent_mod_name, {}) + for dep in parent_deps: + if not dep.startswith(mod_base_name) and dep not in mod_deps: + log.debug3( + "Adding direct-dependency of parent mod [%s]: %s", + parent_mod_name, + dep, + ) + mod_deps.add(dep) + parent_direct_deps.setdefault(mod_name, {}).setdefault( + parent_mod_name, set() + ).add(dep) + log.debug3("Parent direct dep map: %s", parent_direct_deps) + return parent_direct_deps + + def _flatten_deps( module_name: str, module_deps_map: Dict[str, List[str]], + parent_direct_deps: Dict[str, Dict[str, List[str]]], ) -> Dict[str, List[str]]: """Flatten the names of all modules that the target module depends on""" + + # Look through all modules that are directly required by this target module. + # This only looks at the leaves, so if the module depends on foo.bar.baz, + # only the deps for foo.bar.baz will be incluced and not foo.bar.buz or + # foo.biz. all_deps = {} mods_to_check = {module_name: []} while mods_to_check: next_mods_to_check = {} for mod_to_check, parent_path in mods_to_check.items(): + mod_parents_direct_deps = parent_direct_deps.get(mod_to_check, {}) mod_path = parent_path + [mod_to_check] mod_deps = set(module_deps_map.get(mod_to_check, [])) log.debug4("Mod deps for %s: %s", mod_to_check, mod_deps) new_mods = mod_deps - set(all_deps.keys()) next_mods_to_check.update({new_mod: mod_path for new_mod in new_mods}) for mod_dep in mod_deps: - all_deps.setdefault(mod_dep, []).append(mod_path) + # If this is a parent direct dep, add the parent to the path + mod_dep_direct_parents = [] + for ( + mod_parent, + mod_parent_direct_deps, + ) in mod_parents_direct_deps.items(): + if mod_dep in mod_parent_direct_deps: + mod_dep_direct_parents.append(mod_parent) + if mod_dep_direct_parents: + for mod_dep_direct_parent in mod_dep_direct_parents: + all_deps.setdefault(mod_dep, []).append( + [mod_dep_direct_parent] + mod_path + ) + else: + all_deps.setdefault(mod_dep, []).append(mod_path) log.debug3("Next mods to check: %s", next_mods_to_check) mods_to_check = next_mods_to_check + + # Create the flattened dependencies with the source lists for each mod_base_name = module_name.partition(".")[0] flat_base_deps = {} for dep, dep_sources in all_deps.items(): diff --git a/import_tracker/setup_tools.py b/import_tracker/setup_tools.py index 74ab7e1..d90e677 100644 --- a/import_tracker/setup_tools.py +++ b/import_tracker/setup_tools.py @@ -129,16 +129,6 @@ def parse_requirements( common_intersection = set() log.debug3("Raw common intersection: %s", common_intersection) - # Add direct dependencies of all parent modules to the non_extra_union - for module_name, imports_info in library_import_mapping.items(): - if module_name not in extras_modules: - direct_deps = { - requirement_name_map[dep_name] - for dep_name, dep_info in imports_info.items() - if dep_info.get("type") == TYPE_DIRECT - } - non_extra_union = non_extra_union.union(direct_deps) - common_imports = common_intersection.union(non_extra_union) log.debug3("Common intersection: %s", common_intersection) log.debug3("Non extra union: %s", non_extra_union) diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index e01fc96..7b88b64 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -80,3 +80,34 @@ def test_track_module_with_limited_submodules(): "sample_lib": sorted(["conditional_deps", "alog", "yaml"]), "sample_lib.submod1": ["conditional_deps"], } + + +def test_sibling_import(): + """Make sure that a library with a submodule that imports a sibling + submodule properly tracks dependencies through the sibling + """ + lib_mapping = import_tracker.track_module( + "inter_mod_deps", + submodules=True, + ) + assert (set(lib_mapping["inter_mod_deps.submod1"])) == {"alog"} + assert (set(lib_mapping["inter_mod_deps.submod2"])) == { + "alog", + "yaml", + } + assert (set(lib_mapping["inter_mod_deps.submod2.foo"])) == { + "yaml", + } + assert (set(lib_mapping["inter_mod_deps.submod2.bar"])) == { + "yaml", + } + assert (set(lib_mapping["inter_mod_deps.submod3"])) == { + "alog", + "yaml", + } + assert (set(lib_mapping["inter_mod_deps.submod4"])) == { + "yaml", + } + assert (set(lib_mapping["inter_mod_deps.submod5"])) == { + "yaml", + } From 765d11edebeeace244571d6da8b443c244ef1f46 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 12:08:07 -0600 Subject: [PATCH 08/26] LogicOverhaul: Fix how parent direct deps show up in the source stacks to avoid duplication Signed-off-by: Gabe Goodhart --- import_tracker/import_tracker.py | 35 +++++++++++++---- test/test_import_tracker.py | 37 ++++++++++++++++++ test/test_main.py | 64 -------------------------------- 3 files changed, 64 insertions(+), 72 deletions(-) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index 8bcae87..3f8de06 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -471,30 +471,48 @@ def _flatten_deps( while mods_to_check: next_mods_to_check = {} for mod_to_check, parent_path in mods_to_check.items(): + log.debug4("Checking mod %s", mod_to_check) mod_parents_direct_deps = parent_direct_deps.get(mod_to_check, {}) mod_path = parent_path + [mod_to_check] mod_deps = set(module_deps_map.get(mod_to_check, [])) - log.debug4("Mod deps for %s: %s", mod_to_check, mod_deps) + log.debug4( + "Mod deps for %s at path %s: %s", mod_to_check, mod_path, mod_deps + ) new_mods = mod_deps - set(all_deps.keys()) next_mods_to_check.update({new_mod: mod_path for new_mod in new_mods}) for mod_dep in mod_deps: - # If this is a parent direct dep, add the parent to the path - mod_dep_direct_parents = [] + # If this is a parent direct dep, and the stack for this parent + # is not already present in the dep stacks for this dependency, + # add the parent to the path + mod_dep_direct_parents = {} for ( mod_parent, mod_parent_direct_deps, ) in mod_parents_direct_deps.items(): if mod_dep in mod_parent_direct_deps: - mod_dep_direct_parents.append(mod_parent) - if mod_dep_direct_parents: - for mod_dep_direct_parent in mod_dep_direct_parents: - all_deps.setdefault(mod_dep, []).append( - [mod_dep_direct_parent] + mod_path + log.debug4( + "Found direct parent dep for [%s] from parent [%s] and dep [%s]", + mod_to_check, + mod_parent, + mod_dep, ) + mod_dep_direct_parents[mod_parent] = [ + mod_parent + ] in all_deps.get(mod_dep, []) + if mod_dep_direct_parents: + for ( + mod_dep_direct_parent, + already_present, + ) in mod_dep_direct_parents.items(): + if not already_present: + all_deps.setdefault(mod_dep, []).append( + [mod_dep_direct_parent] + mod_path + ) else: all_deps.setdefault(mod_dep, []).append(mod_path) log.debug3("Next mods to check: %s", next_mods_to_check) mods_to_check = next_mods_to_check + log.debug4("All deps: %s", all_deps) # Create the flattened dependencies with the source lists for each mod_base_name = module_name.partition(".")[0] @@ -505,6 +523,7 @@ def _flatten_deps( dep_root_mod_name = dep.partition(".")[0] flat_dep_sources = flat_base_deps.setdefault(dep_root_mod_name, []) for dep_source in dep_sources: + log.debug4("Considering dep source list for %s: %s", dep, dep_source) flat_dep_source = dep_source if dep_root_mod_name in dep_source: flat_dep_source = dep_source[: dep_source.index(dep_root_mod_name)] diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index 7b88b64..bcc8e37 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -111,3 +111,40 @@ def test_sibling_import(): assert (set(lib_mapping["inter_mod_deps.submod5"])) == { "yaml", } + + +def test_import_stack_tracking(): + """Make sure that tracking the import stack works as expected""" + lib_mapping = import_tracker.track_module( + "inter_mod_deps", + submodules=True, + track_import_stack=True, + ) + + assert set(lib_mapping.keys()) == { + "inter_mod_deps", + "inter_mod_deps.submod1", + "inter_mod_deps.submod2", + "inter_mod_deps.submod2.foo", + "inter_mod_deps.submod2.bar", + "inter_mod_deps.submod3", + "inter_mod_deps.submod4", + "inter_mod_deps.submod5", + } + + # Check one of the stacks to make sure it's correct + assert lib_mapping["inter_mod_deps.submod2"] == { + "alog": { + "stack": [ + [ + "inter_mod_deps.submod2", + "inter_mod_deps.submod1", + ] + ] + }, + "yaml": { + "stack": [ + ["inter_mod_deps.submod2"], + ] + }, + } diff --git a/test/test_main.py b/test/test_main.py index dc86459..a1212e9 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -200,70 +200,6 @@ def test_error_submodules_without_recursive(): main() -def test_import_stack_tracking(capsys): - """Make sure that tracking the import stack works as expected""" - with cli_args( - "--name", - "inter_mod_deps", - "--recursive", - "--track_import_stack", - ): - main() - captured = capsys.readouterr() - assert captured.out - parsed_out = json.loads(captured.out) - - assert set(parsed_out.keys()) == { - "inter_mod_deps", - "inter_mod_deps.submod1", - "inter_mod_deps.submod2", - "inter_mod_deps.submod2.foo", - "inter_mod_deps.submod2.bar", - "inter_mod_deps.submod3", - "inter_mod_deps.submod4", - "inter_mod_deps.submod5", - } - - # Check one of the stacks to make sure it's correct - test_lib_dir = os.path.realpath( - os.path.join( - os.path.dirname(__file__), - "sample_libs", - "inter_mod_deps", - ) - ) - assert parsed_out["inter_mod_deps.submod2"] == { - "alog": [ - { - "filename": f"{test_lib_dir}/submod1/__init__.py", - "lineno": 6, - "code_context": ["import alog"], - }, - { - "filename": f"{test_lib_dir}/__init__.py", - "lineno": 17, - "code_context": [ - "from . import submod1, submod2, submod3, submod4, submod5" - ], - }, - ], - "yaml": [ - { - "filename": f"{test_lib_dir}/submod2/__init__.py", - "lineno": 6, - "code_context": ["import yaml"], - }, - { - "filename": f"{test_lib_dir}/__init__.py", - "lineno": 17, - "code_context": [ - "from . import submod1, submod2, submod3, submod4, submod5" - ], - }, - ], - } - - def test_detect_transitive_no_stack_traces(capsys): """Test that --detect_transitive works as expected""" with cli_args( From 76bee666668c17954ca06381e1fa610e55f004f2 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 13:48:14 -0600 Subject: [PATCH 09/26] LogicOverhaul: Comment fix in conftest Signed-off-by: Gabe Goodhart --- test/conftest.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index 42b417a..3fbccbe 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -18,9 +18,6 @@ def configure_logging(): """Fixture that configures logging from the env. It is auto-used, so if imported, it will automatically configure for each test. - - NOTE: The import of alog is inside the function since alog is used as a - sample package for lazy importing in some tests """ logging.basicConfig() log.root.setLevel(getattr(logging, os.environ.get("LOG_LEVEL", "warning").upper())) From d626b334c4e4f356dc2c0faa59314a1d1f2a3ab9 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 13:49:05 -0600 Subject: [PATCH 10/26] LogicOverhaul: Fix cli args and remove invalid tests from test_main There are still several failing tests here that need to be updated Signed-off-by: Gabe Goodhart --- test/test_main.py | 39 ++++----------------------------------- 1 file changed, 4 insertions(+), 35 deletions(-) diff --git a/test/test_main.py b/test/test_main.py index a1212e9..13dce28 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -100,7 +100,7 @@ def test_sibling_import(capsys): """Make sure that a library with a submodule that imports a sibling submodule properly tracks dependencies through the sibling """ - with cli_args("--name", "inter_mod_deps", "--recursive"): + with cli_args("--name", "inter_mod_deps", "--submodules"): main() captured = capsys.readouterr() assert captured.out @@ -140,36 +140,6 @@ def test_lib_with_lazy_imports(capsys): assert "lazy_import_errors" in parsed_out -def test_lib_with_side_effect_imports(): - """Make sure that a library with import-time side effects works when - specifying the --side_effect_modules argument and fails without it - """ - - # Expect failure without the argument - with cli_args("--name", "side_effects.mod"): - with pytest.raises(AssertionError): - main() - - # Expect success with the argument - with cli_args( - "--name", - "side_effects.mod", - "--side_effect_modules", - "side_effects.global_thing", - ): - main() - - # Ensure that the argument is passed recursively - with cli_args( - "--name", - "side_effects", - "--side_effect_modules", - "side_effects.global_thing", - "--recursive", - ): - main() - - def test_with_limited_submodules(capsys): """Make sure that when a list of submodules is given, the recursion only applies to those submodules. @@ -177,7 +147,6 @@ def test_with_limited_submodules(capsys): with cli_args( "--name", "sample_lib", - "--recursive", "--submodules", "sample_lib.submod1", ): @@ -238,7 +207,7 @@ def test_detect_transitive_with_stack_traces(capsys): with cli_args( "--name", "direct_dep_ambiguous", - "--recursive", + "--submodules", "--detect_transitive", "--track_import_stack", ): @@ -328,7 +297,7 @@ def test_detect_transitive_with_nested_module(capsys): with cli_args( "--name", "direct_dep_nested", - "--recursive", + "--submodules", "--detect_transitive", ): main() @@ -366,7 +335,7 @@ def test_lazy_module_trigger(capsys): """Make sure that a sub-module which holds LazyModule attrs does not incorrectly trigger their imports when run through import_tracker. """ - with cli_args("--name", "lazy_module", "--recursive"): + with cli_args("--name", "lazy_module", "--submodules"): main() captured = capsys.readouterr() assert captured.out From dd46c298f3d3866d1f01c7197e2f5c1bf3112837 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 13:55:24 -0600 Subject: [PATCH 11/26] LogicOverhaul: Move more tests over to test_import_tracker There are some _minor_ output deltas in the direct_dep_ambiguous where direct_dep_ambigous.bar now depends on alog because it is a direct dep of the parent module direct_dep_ambiguous. This is more correct and consistent and avoids one of the places where import order used to influence the result. Signed-off-by: Gabe Goodhart --- test/test_import_tracker.py | 115 ++++++++++++++++++++++++--- test/test_main.py | 152 ------------------------------------ 2 files changed, 104 insertions(+), 163 deletions(-) diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index bcc8e37..ca67909 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -2,18 +2,8 @@ Tests for the import_tracker module's public API """ -# Standard -from types import ModuleType -import json -import os -import sys -import tempfile -import warnings - -# Third Party -import pytest - # Local +from import_tracker import constants import import_tracker ## Package API ################################################################# @@ -148,3 +138,106 @@ def test_import_stack_tracking(): ] }, } + + +def test_detect_transitive_no_stack_traces(): + """Test that --detect_transitive works as expected""" + lib_mapping = import_tracker.track_module( + "direct_dep_ambiguous", + submodules=True, + detect_transitive=True, + ) + assert lib_mapping == { + "direct_dep_ambiguous": { + "alog": { + "type": constants.TYPE_DIRECT, + }, + "yaml": { + "type": constants.TYPE_TRANSITIVE, + }, + }, + "direct_dep_ambiguous.foo": { + "alog": { + "type": constants.TYPE_DIRECT, + }, + "yaml": { + "type": constants.TYPE_DIRECT, + }, + }, + "direct_dep_ambiguous.bar": { + "alog": { + "type": constants.TYPE_TRANSITIVE, + }, + }, + } + + +def test_detect_transitive_with_stack_traces(): + """Test that detect_transitive + track_import_stack works as expected""" + lib_mapping = import_tracker.track_module( + "direct_dep_ambiguous", + submodules=True, + detect_transitive=True, + track_import_stack=True, + ) + assert lib_mapping == { + "direct_dep_ambiguous": { + "alog": { + "stack": [ + [ + "direct_dep_ambiguous", + ], + [ + "direct_dep_ambiguous", + "direct_dep_ambiguous.foo", + ], + ], + "type": "direct", + }, + "yaml": { + "stack": [ + [ + "direct_dep_ambiguous", + "direct_dep_ambiguous.foo", + ], + ], + "type": "transitive", + }, + }, + "direct_dep_ambiguous.bar": { + "alog": { + "stack": [ + [ + "direct_dep_ambiguous", + "direct_dep_ambiguous.bar", + ], + ], + "type": "transitive", + }, + }, + "direct_dep_ambiguous.foo": { + "alog": { + "stack": [ + ["direct_dep_ambiguous.foo"], + ], + "type": "direct", + }, + "yaml": { + "stack": [ + ["direct_dep_ambiguous.foo"], + ], + "type": "direct", + }, + }, + } + + +def test_with_limited_submodules(): + """Make sure that when a list of submodules is given, the recursion only + applies to those submodules. + """ + lib_mapping = import_tracker.track_module( + "sample_lib", + submodules=["sample_lib.submod1"], + ) + assert set(lib_mapping.keys()) == {"sample_lib", "sample_lib.submod1"} diff --git a/test/test_main.py b/test/test_main.py index 13dce28..18346a6 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -140,158 +140,6 @@ def test_lib_with_lazy_imports(capsys): assert "lazy_import_errors" in parsed_out -def test_with_limited_submodules(capsys): - """Make sure that when a list of submodules is given, the recursion only - applies to those submodules. - """ - with cli_args( - "--name", - "sample_lib", - "--submodules", - "sample_lib.submod1", - ): - main() - captured = capsys.readouterr() - assert captured.out - parsed_out = json.loads(captured.out) - assert list(parsed_out.keys()) == ["sample_lib", "sample_lib.submod1"] - - -def test_error_submodules_without_recursive(): - """Make sure an error is raised when submodules given without recursive""" - with cli_args( - "--name", - "sample_lib", - "--submodules", - "sample_lib.submod1", - ): - with pytest.raises(ValueError): - main() - - -def test_detect_transitive_no_stack_traces(capsys): - """Test that --detect_transitive works as expected""" - with cli_args( - "--name", - "direct_dep_ambiguous", - "--recursive", - "--detect_transitive", - ): - main() - captured = capsys.readouterr() - assert captured.out - parsed_out = json.loads(captured.out) - assert parsed_out == { - "direct_dep_ambiguous": { - "alog": { - "type": constants.TYPE_DIRECT, - }, - "yaml": { - "type": constants.TYPE_TRANSITIVE, - }, - }, - "direct_dep_ambiguous.foo": { - "alog": { - "type": constants.TYPE_DIRECT, - }, - "yaml": { - "type": constants.TYPE_DIRECT, - }, - }, - "direct_dep_ambiguous.bar": {}, - } - - -def test_detect_transitive_with_stack_traces(capsys): - """Test that --detect_transitive works as expected""" - with cli_args( - "--name", - "direct_dep_ambiguous", - "--submodules", - "--detect_transitive", - "--track_import_stack", - ): - main() - captured = capsys.readouterr() - assert captured.out - parsed_out = json.loads(captured.out) - - test_lib_dir = os.path.realpath( - os.path.join( - os.path.dirname(__file__), - "sample_libs", - "direct_dep_ambiguous", - ) - ) - assert parsed_out == { - "direct_dep_ambiguous": { - "alog": { - "stack": [ - { - "filename": f"{test_lib_dir}/foo.py", - "lineno": 6, - "code_context": ["import alog"], - }, - { - "filename": f"{test_lib_dir}/__init__.py", - "lineno": 9, - "code_context": ["from . import bar, foo"], - }, - ], - "type": "direct", - }, - "yaml": { - "stack": [ - { - "filename": f"{test_lib_dir}/foo.py", - "lineno": 3, - "code_context": ["import yaml"], - }, - { - "filename": f"{test_lib_dir}/__init__.py", - "lineno": 9, - "code_context": ["from . import bar, foo"], - }, - ], - "type": "transitive", - }, - }, - "direct_dep_ambiguous.bar": {}, - "direct_dep_ambiguous.foo": { - "alog": { - "stack": [ - { - "filename": f"{test_lib_dir}/foo.py", - "lineno": 6, - "code_context": ["import alog"], - }, - { - "filename": f"{test_lib_dir}/__init__.py", - "lineno": 9, - "code_context": ["from . import bar, foo"], - }, - ], - "type": "direct", - }, - "yaml": { - "stack": [ - { - "filename": f"{test_lib_dir}/foo.py", - "lineno": 3, - "code_context": ["import yaml"], - }, - { - "filename": f"{test_lib_dir}/__init__.py", - "lineno": 9, - "code_context": ["from . import bar, foo"], - }, - ], - "type": "direct", - }, - }, - } - - def test_detect_transitive_with_nested_module(capsys): """Test that --detect_transitive works with nested modules as expected""" with cli_args( From 6b815b433ecaa47627a65f6dd9249e9807c23ee5 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 14:05:47 -0600 Subject: [PATCH 12/26] LogicOverhaul: Fix output for modules with no dependencies Signed-off-by: Gabe Goodhart --- import_tracker/import_tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index 3f8de06..5fa4e92 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -170,7 +170,7 @@ def track_module( mod: list(sorted(deps.keys())) for mod, deps in flattened_deps.items() } else: - deps_out = {} + deps_out = {mod: {} for mod in flattened_deps.keys()} if detect_transitive: for mod, deps in flattened_deps.items(): From 1e82023e47f1189378562230a56147d75e1e745e Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 14:06:41 -0600 Subject: [PATCH 13/26] LogicOverhaul: Move the test for imports behind LazyModule BREAKING CHANGE: With the overhaul to use bytecode, lazy deps are fully invisible since there's no way to detect when a LazyModule triggers its import without inspecting every __getattr__ invocation in the bytecode! This is a departure from the results when instrumenting the import framework where these changes can be detected in sys.modules. Ultimately, this functionality with LazyModule should probably go away fully since it's not a particularly userful tool anymore. Signed-off-by: Gabe Goodhart --- test/sample_libs/lazy_module/mod.py | 1 + test/test_import_tracker.py | 23 +++++++++++++++++++++++ test/test_main.py | 17 ----------------- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/test/sample_libs/lazy_module/mod.py b/test/sample_libs/lazy_module/mod.py index c647f45..0e31de0 100644 --- a/test/sample_libs/lazy_module/mod.py +++ b/test/sample_libs/lazy_module/mod.py @@ -1,4 +1,5 @@ # Local from .lazy_deps import alog +# Calling __getattr__ here triggers the lazy import at import time! log = alog.use_channel("foobar") diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index ca67909..82f8a06 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -241,3 +241,26 @@ def test_with_limited_submodules(): submodules=["sample_lib.submod1"], ) assert set(lib_mapping.keys()) == {"sample_lib", "sample_lib.submod1"} + + +def test_lazy_module_trigger(): + """Make sure that a sub-module which holds LazyModule attrs does not + incorrectly trigger their imports when run through import_tracker. + + BREAKING CHANGE: With the overhaul to use bytecode, lazy deps are fully + invisible since there's no way to detect when a LazyModule triggers its + import without inspecting every __getattr__ invocation in the bytecode! + This is a departure from the results when instrumenting the import + framework where these changes can be detected in sys.modules. + Ultimately, this functionality with LazyModule should probably go away + fully since it's not a particularly userful tool anymore. + """ + lib_mapping = import_tracker.track_module( + "lazy_module", + submodules=True, + ) + assert lib_mapping == { + "lazy_module": [], + "lazy_module.lazy_deps": [], + "lazy_module.mod": [], + } diff --git a/test/test_main.py b/test/test_main.py index 18346a6..3b3b064 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -177,20 +177,3 @@ def test_detect_transitive_with_nested_module(capsys): "alog": {"type": "direct"}, }, } - - -def test_lazy_module_trigger(capsys): - """Make sure that a sub-module which holds LazyModule attrs does not - incorrectly trigger their imports when run through import_tracker. - """ - with cli_args("--name", "lazy_module", "--submodules"): - main() - captured = capsys.readouterr() - assert captured.out - parsed_out = json.loads(captured.out) - - assert parsed_out == { - "lazy_module": ["alog"], - "lazy_module.lazy_deps": [], - "lazy_module.mod": ["alog"], - } From d97df4c5fd24d56222f7727f8fd49211879f9c00 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 14:14:08 -0600 Subject: [PATCH 14/26] LogicOverhaul: Port the last failing test! This one also has some output changes caused by not defaulting to full_depth=True Signed-off-by: Gabe Goodhart --- test/test_import_tracker.py | 34 +++++++++++++++++++++++++++----- test/test_main.py | 39 ------------------------------------- 2 files changed, 29 insertions(+), 44 deletions(-) diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index 82f8a06..fb2b7ae 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -192,7 +192,7 @@ def test_detect_transitive_with_stack_traces(): "direct_dep_ambiguous.foo", ], ], - "type": "direct", + "type": constants.TYPE_DIRECT, }, "yaml": { "stack": [ @@ -201,7 +201,7 @@ def test_detect_transitive_with_stack_traces(): "direct_dep_ambiguous.foo", ], ], - "type": "transitive", + "type": constants.TYPE_TRANSITIVE, }, }, "direct_dep_ambiguous.bar": { @@ -212,7 +212,7 @@ def test_detect_transitive_with_stack_traces(): "direct_dep_ambiguous.bar", ], ], - "type": "transitive", + "type": constants.TYPE_TRANSITIVE, }, }, "direct_dep_ambiguous.foo": { @@ -220,13 +220,13 @@ def test_detect_transitive_with_stack_traces(): "stack": [ ["direct_dep_ambiguous.foo"], ], - "type": "direct", + "type": constants.TYPE_DIRECT, }, "yaml": { "stack": [ ["direct_dep_ambiguous.foo"], ], - "type": "direct", + "type": constants.TYPE_DIRECT, }, }, } @@ -243,6 +243,30 @@ def test_with_limited_submodules(): assert set(lib_mapping.keys()) == {"sample_lib", "sample_lib.submod1"} +def test_detect_transitive_with_nested_module(): + """Test that detect_transitive works with nested modules as expected""" + lib_mapping = import_tracker.track_module( + "direct_dep_nested", + submodules=True, + detect_transitive=True, + ) + assert lib_mapping == { + "direct_dep_nested": { + "alog": {"type": constants.TYPE_TRANSITIVE}, + "sample_lib": {"type": constants.TYPE_DIRECT}, + "yaml": {"type": constants.TYPE_TRANSITIVE}, + }, + "direct_dep_nested.nested": { + "sample_lib": {"type": constants.TYPE_DIRECT}, + "yaml": {"type": constants.TYPE_DIRECT}, + }, + "direct_dep_nested.nested2": { + "alog": {"type": constants.TYPE_DIRECT}, + "sample_lib": {"type": constants.TYPE_TRANSITIVE}, + }, + } + + def test_lazy_module_trigger(): """Make sure that a sub-module which holds LazyModule attrs does not incorrectly trigger their imports when run through import_tracker. diff --git a/test/test_main.py b/test/test_main.py index 3b3b064..c9639ed 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -138,42 +138,3 @@ def test_lib_with_lazy_imports(capsys): assert captured.out parsed_out = json.loads(captured.out) assert "lazy_import_errors" in parsed_out - - -def test_detect_transitive_with_nested_module(capsys): - """Test that --detect_transitive works with nested modules as expected""" - with cli_args( - "--name", - "direct_dep_nested", - "--submodules", - "--detect_transitive", - ): - main() - captured = capsys.readouterr() - assert captured.out - parsed_out = json.loads(captured.out) - - test_lib_dir = os.path.realpath( - os.path.join( - os.path.dirname(__file__), - "sample_libs", - "direct_dep_nested", - ) - ) - assert parsed_out == { - "direct_dep_nested": { - "alog": {"type": "transitive"}, - "conditional_deps": {"type": "transitive"}, - "sample_lib": {"type": "direct"}, - "yaml": {"type": "transitive"}, - }, - "direct_dep_nested.nested": { - "alog": {"type": "transitive"}, - "conditional_deps": {"type": "transitive"}, - "sample_lib": {"type": "direct"}, - "yaml": {"type": "transitive"}, - }, - "direct_dep_nested.nested2": { - "alog": {"type": "direct"}, - }, - } From 78bee99a23e3979455fe730febf795a303e99db3 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 14:21:43 -0600 Subject: [PATCH 15/26] LogicOverhaul: Split out _is_third_party and ignore known std packages that aren't caught other ways Only "collections" for now Signed-off-by: Gabe Goodhart --- import_tracker/import_tracker.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index 5fa4e92..ee5bc31 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -235,6 +235,9 @@ def _get_dylib_dir(): # The path where global modules are found _std_lib_dir = os.path.realpath(os.path.dirname(os.__file__)) _std_dylib_dir = _get_dylib_dir() +_known_std_pkgs = [ + "collections", +] def _mod_defined_in_init_file(mod: ModuleType) -> bool: @@ -261,19 +264,25 @@ def _get_import_parent_path(mod_name: str) -> str: return parent_path -def _get_non_std_modules(mod_names: Union[Set[str], Dict[str, List[dict]]]) -> Set[str]: - """Take a snapshot of the non-standard modules currently imported""" - # Determine the names from the list that are non-standard - non_std_mods = { - mod_name - for mod_name in mod_names - if not mod_name.startswith("_") +def _is_third_party(mod_name: str) -> bool: + """Detect whether the given module is a third party (non-standard and not + import_tracker)""" + mod_pkg = mod_name.partition(".")[0] + return ( + not mod_name.startswith("_") and ( mod_name not in sys.modules or _get_import_parent_path(mod_name) not in [_std_lib_dir, _std_dylib_dir] ) - and mod_name.partition(".")[0] != THIS_PACKAGE - } + and mod_pkg != THIS_PACKAGE + and mod_pkg not in _known_std_pkgs + ) + + +def _get_non_std_modules(mod_names: Union[Set[str], Dict[str, List[dict]]]) -> Set[str]: + """Take a snapshot of the non-standard modules currently imported""" + # Determine the names from the list that are non-standard + non_std_mods = {mod_name for mod_name in mod_names if _is_third_party(mod_name)} # If this is a set, just return it directly if isinstance(mod_names, (set, list)): From 0911cd20458207413737bc6cfac97a8069a63b78 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 14:21:56 -0600 Subject: [PATCH 16/26] LogicOverhaul: Add a test with full_depth=True on Signed-off-by: Gabe Goodhart --- test/test_import_tracker.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index fb2b7ae..bc5d4c8 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -267,6 +267,34 @@ def test_detect_transitive_with_nested_module(): } +def test_detect_transitive_with_nested_module_full_depth(): + """Test that with full_depth, nested dependencies are taken into account""" + lib_mapping = import_tracker.track_module( + "direct_dep_nested", + submodules=True, + detect_transitive=True, + full_depth=True, + ) + assert lib_mapping == { + "direct_dep_nested": { + "alog": {"type": constants.TYPE_TRANSITIVE}, + "sample_lib": {"type": constants.TYPE_DIRECT}, + "yaml": {"type": constants.TYPE_TRANSITIVE}, + "conditional_deps": {"type": constants.TYPE_TRANSITIVE}, + }, + "direct_dep_nested.nested": { + "sample_lib": {"type": constants.TYPE_DIRECT}, + "yaml": {"type": constants.TYPE_DIRECT}, + "conditional_deps": {"type": constants.TYPE_TRANSITIVE}, + }, + "direct_dep_nested.nested2": { + "alog": {"type": constants.TYPE_DIRECT}, + "sample_lib": {"type": constants.TYPE_TRANSITIVE}, + "conditional_deps": {"type": constants.TYPE_TRANSITIVE}, + }, + } + + def test_lazy_module_trigger(): """Make sure that a sub-module which holds LazyModule attrs does not incorrectly trigger their imports when run through import_tracker. From ff11876e11fefb9654cbc207a3bf1a464b1030ff Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 14:25:10 -0600 Subject: [PATCH 17/26] LogicOverhaul: Ignore "if __name__ == '__main__'" for coverage since we don't run subprocesses anymore! Signed-off-by: Gabe Goodhart --- import_tracker/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/import_tracker/__main__.py b/import_tracker/__main__.py index f8db879..0de6213 100644 --- a/import_tracker/__main__.py +++ b/import_tracker/__main__.py @@ -115,5 +115,5 @@ def main(): ) -if __name__ == "__main__": +if __name__ == "__main__": # pragma: no cover main() From 2145466897e2aec4f3bc5cbfb573d2718e237736 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 14:25:39 -0600 Subject: [PATCH 18/26] LogicOverhaul: Remove tracking mode toggle This isn't needed anymore since we don't need import-time isolation anymore Signed-off-by: Gabe Goodhart --- import_tracker/lazy_import_errors.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/import_tracker/lazy_import_errors.py b/import_tracker/lazy_import_errors.py index 83d5f42..2e45276 100644 --- a/import_tracker/lazy_import_errors.py +++ b/import_tracker/lazy_import_errors.py @@ -57,18 +57,6 @@ def lazy_import_errors( ## Implementation Details ###################################################### -_TRACKING_MODE = False - - -def enable_tracking_mode(): - """This is used by the main function to disable all lazy import errors when - running as a standalone script to track the modules in a library. - - This function should NOT be exposed as a public API - """ - global _TRACKING_MODE - _TRACKING_MODE = True - def _make_extras_import_error( get_extras_modules: Callable[[], Set[str]], @@ -125,11 +113,7 @@ def __init__(self, make_error_message: Optional[Callable[[str], str]]): acts as the context manager, so the __enter__ implementation lives in the constructor. """ - if ( - not _TRACKING_MODE - and sys.meta_path - and not isinstance(sys.meta_path[-1], _LazyErrorMetaFinder) - ): + if sys.meta_path and not isinstance(sys.meta_path[-1], _LazyErrorMetaFinder): sys.meta_path.append(_LazyErrorMetaFinder(make_error_message)) @staticmethod From c551ee5ac8f2393521a17ea48148d7e97fda2dba Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 15:48:09 -0600 Subject: [PATCH 19/26] LogicOverhaul: Add a sample lib and test to exercise all types of imports Signed-off-by: Gabe Goodhart --- test/sample_libs/all_import_types/__init__.py | 38 +++++++++++++++++++ .../all_import_types/sub_module1.py | 2 + .../all_import_types/sub_module2/__init__.py | 2 + .../all_import_types/sub_module3.py | 2 + test/test_import_tracker.py | 26 +++++++++++++ 5 files changed, 70 insertions(+) create mode 100644 test/sample_libs/all_import_types/__init__.py create mode 100644 test/sample_libs/all_import_types/sub_module1.py create mode 100644 test/sample_libs/all_import_types/sub_module2/__init__.py create mode 100644 test/sample_libs/all_import_types/sub_module3.py diff --git a/test/sample_libs/all_import_types/__init__.py b/test/sample_libs/all_import_types/__init__.py new file mode 100644 index 0000000..648ef2e --- /dev/null +++ b/test/sample_libs/all_import_types/__init__.py @@ -0,0 +1,38 @@ +""" +This sample lib explicitly exercises ALL different flavors of import statements +to ensure that the bytecode parsing is handling them correctly. + +NOTE: This does _not_ handle dynamic importing via importlib! +""" + +## Third Party ################################################################# + +# Third Party +# Import with a * +from inter_mod_deps import * + +# Import local module with a fully qualified name (ug) +import all_import_types.sub_module3 + +# First Party +# Import multiple attributes in the same from statement +# Import non-module attribute +from alog import AlogFormatterBase, alog, configure + +# Local +# Import sibling module defined in dir w/ __init__.py +# NOTE: This module imports with .. to import submod1 +# Import sibling module defined in file +# NOTE: This module imports with .. to import submod2 +from . import sub_module1, sub_module2 + +# Import nested submodule with a "from" clause +from sample_lib import submod2 + +# Directly import +import sample_lib + +# Directly import nested submodule +import sample_lib.submod1 + +## Local ####################################################################### diff --git a/test/sample_libs/all_import_types/sub_module1.py b/test/sample_libs/all_import_types/sub_module1.py new file mode 100644 index 0000000..6b54bc4 --- /dev/null +++ b/test/sample_libs/all_import_types/sub_module1.py @@ -0,0 +1,2 @@ +# Local +from . import sub_module2 diff --git a/test/sample_libs/all_import_types/sub_module2/__init__.py b/test/sample_libs/all_import_types/sub_module2/__init__.py new file mode 100644 index 0000000..597fa4b --- /dev/null +++ b/test/sample_libs/all_import_types/sub_module2/__init__.py @@ -0,0 +1,2 @@ +# Local +from .. import sub_module1 diff --git a/test/sample_libs/all_import_types/sub_module3.py b/test/sample_libs/all_import_types/sub_module3.py new file mode 100644 index 0000000..100bc63 --- /dev/null +++ b/test/sample_libs/all_import_types/sub_module3.py @@ -0,0 +1,2 @@ +class Foo: + pass diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index bc5d4c8..868f669 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -316,3 +316,29 @@ def test_lazy_module_trigger(): "lazy_module.lazy_deps": [], "lazy_module.mod": [], } + + +def test_all_import_types(): + """Make sure that all different import statement types are covered""" + assert track_module("all_import_types", submodules=True) == { + "all_import_types": [ + "alog", + "inter_mod_deps", + "sample_lib", + ], + "all_import_types.sub_module1": [ + "alog", + "inter_mod_deps", + "sample_lib", + ], + "all_import_types.sub_module2": [ + "alog", + "inter_mod_deps", + "sample_lib", + ], + "all_import_types.sub_module3": [ + "alog", + "inter_mod_deps", + "sample_lib", + ], + } From 4b65c86f39eee74daba74a72a98c7a2aa55b6309 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 15:48:20 -0600 Subject: [PATCH 20/26] LogicOverhaul: More test cov Signed-off-by: Gabe Goodhart --- import_tracker/import_tracker.py | 23 +++---------- test/test_import_tracker.py | 56 +++++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index ee5bc31..ea927a6 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -4,7 +4,7 @@ """ # Standard from types import ModuleType -from typing import Dict, List, Optional, Set, Union +from typing import Dict, Iterable, List, Optional, Set, Union import dis import importlib import os @@ -279,21 +279,10 @@ def _is_third_party(mod_name: str) -> bool: ) -def _get_non_std_modules(mod_names: Union[Set[str], Dict[str, List[dict]]]) -> Set[str]: +def _get_non_std_modules(mod_names: Iterable[str]) -> Set[str]: """Take a snapshot of the non-standard modules currently imported""" # Determine the names from the list that are non-standard - non_std_mods = {mod_name for mod_name in mod_names if _is_third_party(mod_name)} - - # If this is a set, just return it directly - if isinstance(mod_names, (set, list)): - return non_std_mods - - # If it's a dict, limit to the non standard names - return { - mod_name: mod_vals - for mod_name, mod_vals in mod_names.items() - if mod_name in non_std_mods - } + return {mod_name for mod_name in mod_names if _is_third_party(mod_name)} def _get_value_col(dis_line: str) -> str: @@ -346,10 +335,8 @@ def _figure_out_import( # Try with the import_from attached. This might be a module name or a # non-module attribute, so this might not work - if not import_name: - full_import_candidate = import_from - else: - full_import_candidate = f"{import_name}.{import_from}" + full_import_candidate = f"{import_name}.{import_from}" + log.debug3("Looking for [%s] in sys.modules", full_import_candidate) if full_import_candidate in sys.modules: return sys.modules[full_import_candidate] diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index 868f669..df9b08e 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -2,8 +2,17 @@ Tests for the import_tracker module's public API """ +# Standard +from types import ModuleType +import sys + # Local from import_tracker import constants +from import_tracker.import_tracker import ( + _get_imports, + _mod_defined_in_init_file, + track_module, +) import import_tracker ## Package API ################################################################# @@ -30,7 +39,7 @@ def test_track_module_programmatic(): """Test that calling track_module can be invoked to programmatically do tracking (vs as a CLI) """ - sample_lib_mapping = import_tracker.track_module("sample_lib") + sample_lib_mapping = track_module("sample_lib") assert sample_lib_mapping == { "sample_lib": sorted(["alog", "yaml", "conditional_deps"]) } @@ -40,7 +49,7 @@ def test_track_module_with_package(): """Test that calling track_module can be invoked with a relative sub module and parent package """ - sample_lib_mapping = import_tracker.track_module(".submod1", "sample_lib") + sample_lib_mapping = track_module(".submod1", "sample_lib") assert sample_lib_mapping == {"sample_lib.submod1": ["conditional_deps"]} @@ -50,7 +59,7 @@ def test_track_module_recursive(): NOTE: The num_jobs simply exercises that code as there's no real way to validate the parallelism """ - sample_lib_mapping = import_tracker.track_module("sample_lib", submodules=True) + sample_lib_mapping = track_module("sample_lib", submodules=True) assert sample_lib_mapping == { "sample_lib": sorted(["conditional_deps", "alog", "yaml"]), "sample_lib.submod1": ["conditional_deps"], @@ -62,7 +71,7 @@ def test_track_module_recursive(): def test_track_module_with_limited_submodules(): """Test that the submodules arg can be passed through""" - sample_lib_mapping = import_tracker.track_module( + sample_lib_mapping = track_module( "sample_lib", submodules=["sample_lib.submod1"], ) @@ -76,7 +85,7 @@ def test_sibling_import(): """Make sure that a library with a submodule that imports a sibling submodule properly tracks dependencies through the sibling """ - lib_mapping = import_tracker.track_module( + lib_mapping = track_module( "inter_mod_deps", submodules=True, ) @@ -105,7 +114,7 @@ def test_sibling_import(): def test_import_stack_tracking(): """Make sure that tracking the import stack works as expected""" - lib_mapping = import_tracker.track_module( + lib_mapping = track_module( "inter_mod_deps", submodules=True, track_import_stack=True, @@ -142,7 +151,7 @@ def test_import_stack_tracking(): def test_detect_transitive_no_stack_traces(): """Test that --detect_transitive works as expected""" - lib_mapping = import_tracker.track_module( + lib_mapping = track_module( "direct_dep_ambiguous", submodules=True, detect_transitive=True, @@ -174,7 +183,7 @@ def test_detect_transitive_no_stack_traces(): def test_detect_transitive_with_stack_traces(): """Test that detect_transitive + track_import_stack works as expected""" - lib_mapping = import_tracker.track_module( + lib_mapping = track_module( "direct_dep_ambiguous", submodules=True, detect_transitive=True, @@ -236,7 +245,7 @@ def test_with_limited_submodules(): """Make sure that when a list of submodules is given, the recursion only applies to those submodules. """ - lib_mapping = import_tracker.track_module( + lib_mapping = track_module( "sample_lib", submodules=["sample_lib.submod1"], ) @@ -245,7 +254,7 @@ def test_with_limited_submodules(): def test_detect_transitive_with_nested_module(): """Test that detect_transitive works with nested modules as expected""" - lib_mapping = import_tracker.track_module( + lib_mapping = track_module( "direct_dep_nested", submodules=True, detect_transitive=True, @@ -269,7 +278,7 @@ def test_detect_transitive_with_nested_module(): def test_detect_transitive_with_nested_module_full_depth(): """Test that with full_depth, nested dependencies are taken into account""" - lib_mapping = import_tracker.track_module( + lib_mapping = track_module( "direct_dep_nested", submodules=True, detect_transitive=True, @@ -307,7 +316,7 @@ def test_lazy_module_trigger(): Ultimately, this functionality with LazyModule should probably go away fully since it's not a particularly userful tool anymore. """ - lib_mapping = import_tracker.track_module( + lib_mapping = track_module( "lazy_module", submodules=True, ) @@ -342,3 +351,26 @@ def test_all_import_types(): "sample_lib", ], } + + +## Details ##################################################################### + + +def test_get_imports_no_bytecode(): + """Excercise _get_imports and _mod_defined_in_init_file on a module with no + bytecode to ensure that they doesn't explode! + """ + new_mod = ModuleType("new_mod") + assert _get_imports(new_mod) == set() + assert not _mod_defined_in_init_file(new_mod) + + +def test_missing_parent_mod(): + """This is a likely unreachable corner case, but this test exercises the + case where the expected parent module doesn't exist in sys.modules + """ + # Local + from sample_lib import nested + + del sys.modules["sample_lib"] + assert track_module("sample_lib.nested") From 179f9d941cca20aff6e1e57a855653a7f92812dc Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 16:53:18 -0600 Subject: [PATCH 21/26] LogicOverhaul: More coverage cleanup Signed-off-by: Gabe Goodhart --- import_tracker/import_tracker.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index ea927a6..7de15b8 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -3,6 +3,7 @@ through import statements """ # Standard +from ast import Import from types import ModuleType from typing import Dict, Iterable, List, Optional, Set, Union import dis @@ -356,10 +357,7 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: try: loader = mod.__loader__ or mod.__spec__.loader mod_code = loader.get_code(mod.__name__) - except ImportError: - log.debug2("Could not get_code(%s)", mod.__name__) - return all_imports - except AttributeError: + except (AttributeError, ImportError): log.warning("Couldn't find a loader for %s!", mod.__name__) return all_imports if mod_code is None: @@ -381,10 +379,7 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: current_dots = int(line_val) elif "IMPORT_NAME" in line: open_import = True - if line_val.isnumeric(): - current_dots = int(line_val) - else: - current_import_name = line_val + current_import_name = line_val elif "IMPORT_FROM" in line: open_import = True current_import_from = line_val From b1f2575e8393985844f553f9e6279d8b9e6815f2 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 16:58:01 -0600 Subject: [PATCH 22/26] LogicOverhaul: Remove unreachable code to close a final import This SHOULD be impossible to reach, but I don't know the bytecode well enough to know this for sure Signed-off-by: Gabe Goodhart --- import_tracker/import_tracker.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index 7de15b8..60f85b7 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -402,14 +402,15 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: open_import = False current_import_from = None - # If there's an open import at the end, close it - if open_import: - import_mod = _figure_out_import( - mod, current_dots, current_import_name, current_import_from - ) - if import_mod is not None: - log.debug2("Adding import module [%s]", import_mod.__name__) - all_imports.add(import_mod) + # To the best of my knowledge, all bytecode will end with something other + # than an import, even if an import is the last line in the file (e.g. + # STORE_NAME). If this somehow proves to be untrue, please file a bug! + assert not open_import, "Found an unclosed import in {}! {}/{}/{}".format( + mod.__name__, + current_dots, + current_import_name, + current_import_from, + ) return all_imports From a729a9dd58762984a26d4a89285f1d08a720c1dc Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 25 Jul 2022 17:04:55 -0600 Subject: [PATCH 23/26] LogicOverhaul: Add a test that repros workflows/blocks Signed-off-by: Gabe Goodhart --- test/sample_libs/deep_siblings/__init__.py | 2 ++ .../deep_siblings/blocks/__init__.py | 2 ++ .../deep_siblings/blocks/bar_type/__init__.py | 4 ++++ .../deep_siblings/blocks/bar_type/bar.py | 6 +++++ .../deep_siblings/blocks/foo_type/__init__.py | 4 ++++ .../deep_siblings/blocks/foo_type/foo.py | 8 +++++++ .../deep_siblings/workflows/__init__.py | 2 ++ .../workflows/foo_type/__init__.py | 4 ++++ .../deep_siblings/workflows/foo_type/foo.py | 7 ++++++ test/test_import_tracker.py | 22 +++++++++++++++++++ 10 files changed, 61 insertions(+) create mode 100644 test/sample_libs/deep_siblings/__init__.py create mode 100644 test/sample_libs/deep_siblings/blocks/__init__.py create mode 100644 test/sample_libs/deep_siblings/blocks/bar_type/__init__.py create mode 100644 test/sample_libs/deep_siblings/blocks/bar_type/bar.py create mode 100644 test/sample_libs/deep_siblings/blocks/foo_type/__init__.py create mode 100644 test/sample_libs/deep_siblings/blocks/foo_type/foo.py create mode 100644 test/sample_libs/deep_siblings/workflows/__init__.py create mode 100644 test/sample_libs/deep_siblings/workflows/foo_type/__init__.py create mode 100644 test/sample_libs/deep_siblings/workflows/foo_type/foo.py diff --git a/test/sample_libs/deep_siblings/__init__.py b/test/sample_libs/deep_siblings/__init__.py new file mode 100644 index 0000000..89f98db --- /dev/null +++ b/test/sample_libs/deep_siblings/__init__.py @@ -0,0 +1,2 @@ +# Local +from . import blocks, workflows diff --git a/test/sample_libs/deep_siblings/blocks/__init__.py b/test/sample_libs/deep_siblings/blocks/__init__.py new file mode 100644 index 0000000..23777f0 --- /dev/null +++ b/test/sample_libs/deep_siblings/blocks/__init__.py @@ -0,0 +1,2 @@ +# Local +from . import bar_type, foo_type diff --git a/test/sample_libs/deep_siblings/blocks/bar_type/__init__.py b/test/sample_libs/deep_siblings/blocks/bar_type/__init__.py new file mode 100644 index 0000000..d44f233 --- /dev/null +++ b/test/sample_libs/deep_siblings/blocks/bar_type/__init__.py @@ -0,0 +1,4 @@ +# Local +from . import bar + +Bar = bar.Bar diff --git a/test/sample_libs/deep_siblings/blocks/bar_type/bar.py b/test/sample_libs/deep_siblings/blocks/bar_type/bar.py new file mode 100644 index 0000000..0894a99 --- /dev/null +++ b/test/sample_libs/deep_siblings/blocks/bar_type/bar.py @@ -0,0 +1,6 @@ +# First Party +import alog + + +class Bar: + pass diff --git a/test/sample_libs/deep_siblings/blocks/foo_type/__init__.py b/test/sample_libs/deep_siblings/blocks/foo_type/__init__.py new file mode 100644 index 0000000..aa2addc --- /dev/null +++ b/test/sample_libs/deep_siblings/blocks/foo_type/__init__.py @@ -0,0 +1,4 @@ +# Local +from . import foo + +Foo = foo.Foo diff --git a/test/sample_libs/deep_siblings/blocks/foo_type/foo.py b/test/sample_libs/deep_siblings/blocks/foo_type/foo.py new file mode 100644 index 0000000..0f96fde --- /dev/null +++ b/test/sample_libs/deep_siblings/blocks/foo_type/foo.py @@ -0,0 +1,8 @@ +# Third Party +import yaml + +SOME_CONSTANT = 1 + + +class Foo: + pass diff --git a/test/sample_libs/deep_siblings/workflows/__init__.py b/test/sample_libs/deep_siblings/workflows/__init__.py new file mode 100644 index 0000000..ffc56f3 --- /dev/null +++ b/test/sample_libs/deep_siblings/workflows/__init__.py @@ -0,0 +1,2 @@ +# Local +from . import foo_type diff --git a/test/sample_libs/deep_siblings/workflows/foo_type/__init__.py b/test/sample_libs/deep_siblings/workflows/foo_type/__init__.py new file mode 100644 index 0000000..aa2addc --- /dev/null +++ b/test/sample_libs/deep_siblings/workflows/foo_type/__init__.py @@ -0,0 +1,4 @@ +# Local +from . import foo + +Foo = foo.Foo diff --git a/test/sample_libs/deep_siblings/workflows/foo_type/foo.py b/test/sample_libs/deep_siblings/workflows/foo_type/foo.py new file mode 100644 index 0000000..f005c91 --- /dev/null +++ b/test/sample_libs/deep_siblings/workflows/foo_type/foo.py @@ -0,0 +1,7 @@ +# Local +from ...blocks.foo_type.foo import SOME_CONSTANT +from ...blocks.foo_type.foo import Foo as FooBlock + + +class Foo: + pass diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index df9b08e..e8a58e9 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -353,6 +353,28 @@ def test_all_import_types(): } +def test_deep_siblings(): + """This test exercises the sample library that was the main reason for the + full refactor in the first place. The library is constructed such that there + are sub-directories (blocks and workflows) where individual sub-modules + within workflows may depend on a subset of the sub-modules within blocks. In + this case, we do not want the entire dependency set of blocks to be + attributed to a workflows module, but rather we want just the dependencies + of the block modules that it needs. + """ + assert track_module("deep_siblings", submodules=True) == { + "deep_siblings": ["alog", "yaml"], + "deep_siblings.blocks": ["alog", "yaml"], + "deep_siblings.blocks.foo_type": ["yaml"], + "deep_siblings.blocks.foo_type.foo": ["yaml"], + "deep_siblings.blocks.bar_type": ["alog"], + "deep_siblings.blocks.bar_type.bar": ["alog"], + "deep_siblings.workflows": ["yaml"], + "deep_siblings.workflows.foo_type": ["yaml"], + "deep_siblings.workflows.foo_type.foo": ["yaml"], + } + + ## Details ##################################################################### From 31877bf641992098048029d982553288b63f1de2 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 26 Jul 2022 12:48:39 -0600 Subject: [PATCH 24/26] LogicOverhaul: Review comments Signed-off-by: Gabe Goodhart --- import_tracker/import_tracker.py | 40 +++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index 60f85b7..dbce0d9 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -3,9 +3,8 @@ through import statements """ # Standard -from ast import Import from types import ModuleType -from typing import Dict, Iterable, List, Optional, Set, Union +from typing import Any, Dict, Iterable, List, Optional, Set, Union import dis import importlib import os @@ -25,7 +24,7 @@ def track_module( track_import_stack: bool = False, full_depth: bool = False, detect_transitive: bool = False, -) -> dict: +) -> Union[Dict[str, List[str]], Dict[str, Dict[str, Any]]]: """Track the dependencies of a single python module Args: @@ -49,12 +48,13 @@ def track_module( Detect whether each dependency is 'direct' or 'transitive' Returns: - import_mapping: dict + import_mapping: Union[Dict[str, List[str]], Dict[str, Dict[str, Any]]] The mapping from fully-qualified module name to the set of imports - needed by the given module. If tracking import stacks, each - dependency of a given module maps to a list of lists that each - outline one path through imported modules that caused the given - dependency association. + needed by the given module. If tracking import stacks or detecting + direct vs transitive dependencies, the output schema is + Dict[str, Dict[str, Any]] where the nested dicts hold "stack" and/or + "type" keys respectively. If neither feature is enabled, the schema + is Dict[str, List[str]]. """ # Import the target module @@ -155,7 +155,7 @@ def track_module( log.debug2("Output modules: %s", output_mods) # Add parent direct deps to the module deps map - parent_direct_deps = _add_parent_direct_deps(module_deps_map) + parent_direct_deps = _find_parent_direct_deps(module_deps_map) # Flatten each of the output mods' dependency lists flattened_deps = { @@ -164,15 +164,20 @@ def track_module( } log.debug("Raw output deps map: %s", flattened_deps) - # If detecting transitive deps, look through the stacks and mark each dep as - # transitive or direct + # If not detecting transitive or import stacks, the values are simple lists + # of dependency names if not detect_transitive and not track_import_stack: deps_out = { mod: list(sorted(deps.keys())) for mod, deps in flattened_deps.items() } + + # Otherwise, the values will be dicts with some combination of "type" and + # "stack" populated else: deps_out = {mod: {} for mod in flattened_deps.keys()} + # If detecting transitive deps, look through the stacks and mark each dep as + # transitive or direct if detect_transitive: for mod, deps in flattened_deps.items(): for dep_name, dep_stacks in deps.items(): @@ -287,6 +292,7 @@ def _get_non_std_modules(mod_names: Iterable[str]) -> Set[str]: def _get_value_col(dis_line: str) -> str: + """Parse the string value from a `dis` output line""" loc = dis_line.find("(") if loc >= 0: return dis_line[loc + 1 : -1] @@ -299,6 +305,10 @@ def _figure_out_import( import_name: Optional[str], import_from: Optional[str], ) -> ModuleType: + """This function takes the set of information about an individual import + statement parsed out of the `dis` output and attempts to find the in-memory + module object it refers to. + """ log.debug2("Figuring out import [%s/%s/%s]", dots, import_name, import_from) # If there are no dots, look for candidate absolute imports @@ -415,11 +425,13 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: return all_imports -def _add_parent_direct_deps( +def _find_parent_direct_deps( module_deps_map: Dict[str, List[str]] ) -> Dict[str, Dict[str, List[str]]]: - """Augment the dependencies of each module in the module deps map with any - third-party dependencies that are directly imported by parent modules. + """Construct a mapping for each module (e.g. foo.bar.baz) to a mapping of + parent modules (e.g. [foo, foo.bar]) and the sets of imports that are + directly imported in those modules. This mapping is used to augment the sets + of required imports for each target module in the final flattening. """ parent_direct_deps = {} From 3d53ff595b1f3840c5639f9adcf98fc14141dc11 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 26 Jul 2022 12:48:55 -0600 Subject: [PATCH 25/26] LogicOverhaul: Default to full_depth in parse_requirements Signed-off-by: Gabe Goodhart --- import_tracker/setup_tools.py | 6 +++ .../__init__.py | 2 + .../full_depth_direct_and_transitive/bar.py | 3 ++ .../full_depth_direct_and_transitive/foo.py | 3 ++ test/test_setup_tools.py | 43 +++++++++++++++++++ 5 files changed, 57 insertions(+) create mode 100644 test/sample_libs/full_depth_direct_and_transitive/__init__.py create mode 100644 test/sample_libs/full_depth_direct_and_transitive/bar.py create mode 100644 test/sample_libs/full_depth_direct_and_transitive/foo.py diff --git a/import_tracker/setup_tools.py b/import_tracker/setup_tools.py index d90e677..c641438 100644 --- a/import_tracker/setup_tools.py +++ b/import_tracker/setup_tools.py @@ -22,6 +22,7 @@ def parse_requirements( requirements: Union[List[str], str], library_name: str, extras_modules: Optional[List[str]] = None, + full_depth: bool = True, **kwargs, ) -> Tuple[List[str], Dict[str, List[str]]]: """This helper uses the lists of required modules and parameters for the @@ -36,6 +37,10 @@ def parse_requirements( extras_modules: Optional[List[str]] List of module names that should be used to generate extras_require sets + full_depth: bool + Passthrough to track_module. The default here is switched to True so + that modules which are both direct and transitive dependencies of + the library are correctly allocated. **kwargs: Additional keyword arguments to pass through to track_module @@ -68,6 +73,7 @@ def parse_requirements( library_name, submodules=True, detect_transitive=True, + full_depth=full_depth, **kwargs, ) log.debug4("Library Import Mapping:\n%s", library_import_mapping) diff --git a/test/sample_libs/full_depth_direct_and_transitive/__init__.py b/test/sample_libs/full_depth_direct_and_transitive/__init__.py new file mode 100644 index 0000000..86656d8 --- /dev/null +++ b/test/sample_libs/full_depth_direct_and_transitive/__init__.py @@ -0,0 +1,2 @@ +# Local +from . import bar, foo diff --git a/test/sample_libs/full_depth_direct_and_transitive/bar.py b/test/sample_libs/full_depth_direct_and_transitive/bar.py new file mode 100644 index 0000000..ab2a227 --- /dev/null +++ b/test/sample_libs/full_depth_direct_and_transitive/bar.py @@ -0,0 +1,3 @@ +# Import single_extra to get alog transitively +# Third Party +import single_extra diff --git a/test/sample_libs/full_depth_direct_and_transitive/foo.py b/test/sample_libs/full_depth_direct_and_transitive/foo.py new file mode 100644 index 0000000..7750fa9 --- /dev/null +++ b/test/sample_libs/full_depth_direct_and_transitive/foo.py @@ -0,0 +1,3 @@ +# Depend on alog directly +# First Party +import alog diff --git a/test/test_setup_tools.py b/test/test_setup_tools.py index 7d5ecc4..1081bb1 100644 --- a/test/test_setup_tools.py +++ b/test/test_setup_tools.py @@ -192,3 +192,46 @@ def test_nested_deps(): "direct_dep_nested.nested": sorted(["PyYaml"]), "direct_dep_nested.nested2": sorted(["alchemy-logging"]), } + + +def test_full_depth_direct_and_transitive(): + """Make sure that a library which holds a dependency as both a direct import + dependency and also requires it transitively through another third party + library correclty allocates the dependency to places where the intermediate + third party library is required. + """ + # Run without full_depth and ensure that alog is only allocated to foo and + # is not in the base requirements + requirements, extras_require = parse_requirements( + ["single_extra", "alchemy-logging"], + "full_depth_direct_and_transitive", + [ + "full_depth_direct_and_transitive.foo", + "full_depth_direct_and_transitive.bar", + ], + full_depth=False, + ) + assert requirements == [] + assert extras_require == { + "all": sorted(["single_extra", "alchemy-logging"]), + "full_depth_direct_and_transitive.foo": ["alchemy-logging"], + "full_depth_direct_and_transitive.bar": ["single_extra"], + } + + # Run without overriding full_depth (defaults to True) and ensure that alog + # is found transitively via single_extra so it ends up in the base + # requirements + requirements, extras_require = parse_requirements( + ["single_extra", "alchemy-logging"], + "full_depth_direct_and_transitive", + [ + "full_depth_direct_and_transitive.foo", + "full_depth_direct_and_transitive.bar", + ], + ) + assert requirements == ["alchemy-logging"] + assert extras_require == { + "all": sorted(["single_extra", "alchemy-logging"]), + "full_depth_direct_and_transitive.foo": [], + "full_depth_direct_and_transitive.bar": ["single_extra"], + } From fb0c020957ff73fc0305d17aaaaff8dbac96aefa Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 26 Jul 2022 12:57:50 -0600 Subject: [PATCH 26/26] LogicOverhaul: Remove LazyModule and everything associated with it This tool was only put into the public API to support a mechanism for hiding imports from tracking in certain parts of a library. Ultimately, that paradigm has proven to be a bad one since those hidden imports can result in fully invisible dependencies if the primary usage modules only bring in those deps via the LazyModules. The better solution is just to carefully manage dependencies to only bring in what's needed. Signed-off-by: Gabe Goodhart --- import_tracker/__init__.py | 1 - import_tracker/lazy_module.py | 36 ------------------- test/sample_libs/lazy_module/__init__.py | 2 -- test/sample_libs/lazy_module/lazy_deps.py | 4 --- test/sample_libs/lazy_module/mod.py | 5 --- test/test_import_tracker.py | 24 ------------- test/test_lazy_module.py | 42 ----------------------- 7 files changed, 114 deletions(-) delete mode 100644 import_tracker/lazy_module.py delete mode 100644 test/sample_libs/lazy_module/__init__.py delete mode 100644 test/sample_libs/lazy_module/lazy_deps.py delete mode 100644 test/sample_libs/lazy_module/mod.py delete mode 100644 test/test_lazy_module.py diff --git a/import_tracker/__init__.py b/import_tracker/__init__.py index b954ab5..5f0dc9f 100644 --- a/import_tracker/__init__.py +++ b/import_tracker/__init__.py @@ -7,4 +7,3 @@ from . import setup_tools from .import_tracker import track_module from .lazy_import_errors import lazy_import_errors -from .lazy_module import LazyModule diff --git a/import_tracker/lazy_module.py b/import_tracker/lazy_module.py deleted file mode 100644 index 609b407..0000000 --- a/import_tracker/lazy_module.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -This module implements utilities that enable tracking of third party deps -through import statements -""" - -# Standard -from types import ModuleType -from typing import Optional -import importlib - -# Local -from .log import log - - -class LazyModule(ModuleType): - """A LazyModule is a module subclass that wraps another module but imports - it lazily and then aliases __getattr__ to the lazily imported module. - """ - - def __init__(self, name: str, package: Optional[str] = None): - """Hang onto the import args to use lazily""" - self.__name = name - self.__package = package - self.__wrapped_module = None - - def __getattr__(self, name: str) -> any: - """When asked for an attribute, make sure the wrapped module is imported - and then delegate - """ - if self.__wrapped_module is None: - log.debug1("Triggering lazy import for %s.%s", self.__package, self.__name) - self.__wrapped_module = importlib.import_module( - self.__name, - self.__package, - ) - return getattr(self.__wrapped_module, name) diff --git a/test/sample_libs/lazy_module/__init__.py b/test/sample_libs/lazy_module/__init__.py deleted file mode 100644 index 802f15e..0000000 --- a/test/sample_libs/lazy_module/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Local -from . import mod diff --git a/test/sample_libs/lazy_module/lazy_deps.py b/test/sample_libs/lazy_module/lazy_deps.py deleted file mode 100644 index 81a4e49..0000000 --- a/test/sample_libs/lazy_module/lazy_deps.py +++ /dev/null @@ -1,4 +0,0 @@ -# Local -from import_tracker import LazyModule - -alog = LazyModule("alog") diff --git a/test/sample_libs/lazy_module/mod.py b/test/sample_libs/lazy_module/mod.py deleted file mode 100644 index 0e31de0..0000000 --- a/test/sample_libs/lazy_module/mod.py +++ /dev/null @@ -1,5 +0,0 @@ -# Local -from .lazy_deps import alog - -# Calling __getattr__ here triggers the lazy import at import time! -log = alog.use_channel("foobar") diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index e8a58e9..24dbe42 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -25,7 +25,6 @@ def test_import_tracker_public_api(): expected_attrs = { "setup_tools", "track_module", - "LazyModule", "lazy_import_errors", } module_attrs = set(dir(import_tracker)) @@ -304,29 +303,6 @@ def test_detect_transitive_with_nested_module_full_depth(): } -def test_lazy_module_trigger(): - """Make sure that a sub-module which holds LazyModule attrs does not - incorrectly trigger their imports when run through import_tracker. - - BREAKING CHANGE: With the overhaul to use bytecode, lazy deps are fully - invisible since there's no way to detect when a LazyModule triggers its - import without inspecting every __getattr__ invocation in the bytecode! - This is a departure from the results when instrumenting the import - framework where these changes can be detected in sys.modules. - Ultimately, this functionality with LazyModule should probably go away - fully since it's not a particularly userful tool anymore. - """ - lib_mapping = track_module( - "lazy_module", - submodules=True, - ) - assert lib_mapping == { - "lazy_module": [], - "lazy_module.lazy_deps": [], - "lazy_module.mod": [], - } - - def test_all_import_types(): """Make sure that all different import statement types are covered""" assert track_module("all_import_types", submodules=True) == { diff --git a/test/test_lazy_module.py b/test/test_lazy_module.py deleted file mode 100644 index af7bcf1..0000000 --- a/test/test_lazy_module.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Tests for the LazyModule -""" - -# Standard -import sys - -# Third Party -import pytest - -# Local -from import_tracker.lazy_module import LazyModule - - -def test_lazy_module_valid_import(): - """Test that using LazyModule, a module is only imported on attribute access""" - assert "alog" not in sys.modules - alog = LazyModule("alog") - assert "alog" not in sys.modules - getattr(alog, "configure") - assert "alog" in sys.modules - - -def test_lazy_module_with_package(): - """Make sure that a relative import works with a package name""" - assert "alog.alog" not in sys.modules - alog = LazyModule(".alog", "alog") - assert "alog.alog" not in sys.modules - getattr(alog, "configure") - assert "alog.alog" in sys.modules - - -def test_lazy_module_bad_import(): - """Make sure that imports of bad modules don't raise until attribute access""" - thingywidget = LazyModule("thingywidget") - with pytest.raises(ImportError): - getattr(thingywidget, "baz") - # DEBUG - # Standard - import sys - - print(sys.modules.keys())