602 lines
20 KiB
Python
602 lines
20 KiB
Python
"""Automatic discovery of Python modules and packages (for inclusion in the
|
|
distribution) and other config values.
|
|
|
|
For the purposes of this module, the following nomenclature is used:
|
|
|
|
- "src-layout": a directory representing a Python project that contains a "src"
|
|
folder. Everything under the "src" folder is meant to be included in the
|
|
distribution when packaging the project. Example::
|
|
|
|
.
|
|
├── tox.ini
|
|
├── pyproject.toml
|
|
└── src/
|
|
└── mypkg/
|
|
├── __init__.py
|
|
├── mymodule.py
|
|
└── my_data_file.txt
|
|
|
|
- "flat-layout": a Python project that does not use "src-layout" but instead
|
|
have a directory under the project root for each package::
|
|
|
|
.
|
|
├── tox.ini
|
|
├── pyproject.toml
|
|
└── mypkg/
|
|
├── __init__.py
|
|
├── mymodule.py
|
|
└── my_data_file.txt
|
|
|
|
- "single-module": a project that contains a single Python script direct under
|
|
the project root (no directory used)::
|
|
|
|
.
|
|
├── tox.ini
|
|
├── pyproject.toml
|
|
└── mymodule.py
|
|
|
|
"""
|
|
|
|
import itertools
|
|
import os
|
|
from fnmatch import fnmatchcase
|
|
from glob import glob
|
|
from pathlib import Path
|
|
from typing import (
|
|
TYPE_CHECKING,
|
|
Callable,
|
|
Dict,
|
|
Iterable,
|
|
Iterator,
|
|
List,
|
|
Mapping,
|
|
Optional,
|
|
Tuple,
|
|
Union
|
|
)
|
|
|
|
import _distutils_hack.override # noqa: F401
|
|
|
|
from distutils import log
|
|
from distutils.util import convert_path
|
|
|
|
_Path = Union[str, os.PathLike]
|
|
_Filter = Callable[[str], bool]
|
|
StrIter = Iterator[str]
|
|
|
|
chain_iter = itertools.chain.from_iterable
|
|
|
|
if TYPE_CHECKING:
|
|
from setuptools import Distribution # noqa
|
|
|
|
|
|
def _valid_name(path: _Path) -> bool:
|
|
# Ignore invalid names that cannot be imported directly
|
|
return os.path.basename(path).isidentifier()
|
|
|
|
|
|
class _Finder:
|
|
"""Base class that exposes functionality for module/package finders"""
|
|
|
|
ALWAYS_EXCLUDE: Tuple[str, ...] = ()
|
|
DEFAULT_EXCLUDE: Tuple[str, ...] = ()
|
|
|
|
@classmethod
|
|
def find(
|
|
cls,
|
|
where: _Path = '.',
|
|
exclude: Iterable[str] = (),
|
|
include: Iterable[str] = ('*',)
|
|
) -> List[str]:
|
|
"""Return a list of all Python items (packages or modules, depending on
|
|
the finder implementation) found within directory 'where'.
|
|
|
|
'where' is the root directory which will be searched.
|
|
It should be supplied as a "cross-platform" (i.e. URL-style) path;
|
|
it will be converted to the appropriate local path syntax.
|
|
|
|
'exclude' is a sequence of names to exclude; '*' can be used
|
|
as a wildcard in the names.
|
|
When finding packages, 'foo.*' will exclude all subpackages of 'foo'
|
|
(but not 'foo' itself).
|
|
|
|
'include' is a sequence of names to include.
|
|
If it's specified, only the named items will be included.
|
|
If it's not specified, all found items will be included.
|
|
'include' can contain shell style wildcard patterns just like
|
|
'exclude'.
|
|
"""
|
|
|
|
exclude = exclude or cls.DEFAULT_EXCLUDE
|
|
return list(
|
|
cls._find_iter(
|
|
convert_path(str(where)),
|
|
cls._build_filter(*cls.ALWAYS_EXCLUDE, *exclude),
|
|
cls._build_filter(*include),
|
|
)
|
|
)
|
|
|
|
@classmethod
|
|
def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def _build_filter(*patterns: str) -> _Filter:
|
|
"""
|
|
Given a list of patterns, return a callable that will be true only if
|
|
the input matches at least one of the patterns.
|
|
"""
|
|
return lambda name: any(fnmatchcase(name, pat) for pat in patterns)
|
|
|
|
|
|
class PackageFinder(_Finder):
|
|
"""
|
|
Generate a list of all Python packages found within a directory
|
|
"""
|
|
|
|
ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__")
|
|
|
|
@classmethod
|
|
def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
|
|
"""
|
|
All the packages found in 'where' that pass the 'include' filter, but
|
|
not the 'exclude' filter.
|
|
"""
|
|
for root, dirs, files in os.walk(str(where), followlinks=True):
|
|
# Copy dirs to iterate over it, then empty dirs.
|
|
all_dirs = dirs[:]
|
|
dirs[:] = []
|
|
|
|
for dir in all_dirs:
|
|
full_path = os.path.join(root, dir)
|
|
rel_path = os.path.relpath(full_path, where)
|
|
package = rel_path.replace(os.path.sep, '.')
|
|
|
|
# Skip directory trees that are not valid packages
|
|
if '.' in dir or not cls._looks_like_package(full_path, package):
|
|
continue
|
|
|
|
# Should this package be included?
|
|
if include(package) and not exclude(package):
|
|
yield package
|
|
|
|
# Keep searching subdirectories, as there may be more packages
|
|
# down there, even if the parent was excluded.
|
|
dirs.append(dir)
|
|
|
|
@staticmethod
|
|
def _looks_like_package(path: _Path, _package_name: str) -> bool:
|
|
"""Does a directory look like a package?"""
|
|
return os.path.isfile(os.path.join(path, '__init__.py'))
|
|
|
|
|
|
class PEP420PackageFinder(PackageFinder):
|
|
@staticmethod
|
|
def _looks_like_package(_path: _Path, _package_name: str) -> bool:
|
|
return True
|
|
|
|
|
|
class ModuleFinder(_Finder):
|
|
"""Find isolated Python modules.
|
|
This function will **not** recurse subdirectories.
|
|
"""
|
|
|
|
@classmethod
|
|
def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
|
|
for file in glob(os.path.join(where, "*.py")):
|
|
module, _ext = os.path.splitext(os.path.basename(file))
|
|
|
|
if not cls._looks_like_module(module):
|
|
continue
|
|
|
|
if include(module) and not exclude(module):
|
|
yield module
|
|
|
|
_looks_like_module = staticmethod(_valid_name)
|
|
|
|
|
|
# We have to be extra careful in the case of flat layout to not include files
|
|
# and directories not meant for distribution (e.g. tool-related)
|
|
|
|
|
|
class FlatLayoutPackageFinder(PEP420PackageFinder):
|
|
_EXCLUDE = (
|
|
"ci",
|
|
"bin",
|
|
"doc",
|
|
"docs",
|
|
"documentation",
|
|
"manpages",
|
|
"news",
|
|
"changelog",
|
|
"test",
|
|
"tests",
|
|
"unit_test",
|
|
"unit_tests",
|
|
"example",
|
|
"examples",
|
|
"scripts",
|
|
"tools",
|
|
"util",
|
|
"utils",
|
|
"python",
|
|
"build",
|
|
"dist",
|
|
"venv",
|
|
"env",
|
|
"requirements",
|
|
# ---- Task runners / Build tools ----
|
|
"tasks", # invoke
|
|
"fabfile", # fabric
|
|
"site_scons", # SCons
|
|
# ---- Other tools ----
|
|
"benchmark",
|
|
"benchmarks",
|
|
"exercise",
|
|
"exercises",
|
|
# ---- Hidden directories/Private packages ----
|
|
"[._]*",
|
|
)
|
|
|
|
DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE))
|
|
"""Reserved package names"""
|
|
|
|
@staticmethod
|
|
def _looks_like_package(_path: _Path, package_name: str) -> bool:
|
|
names = package_name.split('.')
|
|
# Consider PEP 561
|
|
root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs")
|
|
return root_pkg_is_valid and all(name.isidentifier() for name in names[1:])
|
|
|
|
|
|
class FlatLayoutModuleFinder(ModuleFinder):
|
|
DEFAULT_EXCLUDE = (
|
|
"setup",
|
|
"conftest",
|
|
"test",
|
|
"tests",
|
|
"example",
|
|
"examples",
|
|
"build",
|
|
# ---- Task runners ----
|
|
"toxfile",
|
|
"noxfile",
|
|
"pavement",
|
|
"dodo",
|
|
"tasks",
|
|
"fabfile",
|
|
# ---- Other tools ----
|
|
"[Ss][Cc]onstruct", # SCons
|
|
"conanfile", # Connan: C/C++ build tool
|
|
"manage", # Django
|
|
"benchmark",
|
|
"benchmarks",
|
|
"exercise",
|
|
"exercises",
|
|
# ---- Hidden files/Private modules ----
|
|
"[._]*",
|
|
)
|
|
"""Reserved top-level module names"""
|
|
|
|
|
|
def _find_packages_within(root_pkg: str, pkg_dir: _Path) -> List[str]:
|
|
nested = PEP420PackageFinder.find(pkg_dir)
|
|
return [root_pkg] + [".".join((root_pkg, n)) for n in nested]
|
|
|
|
|
|
class ConfigDiscovery:
|
|
"""Fill-in metadata and options that can be automatically derived
|
|
(from other metadata/options, the file system or conventions)
|
|
"""
|
|
|
|
def __init__(self, distribution: "Distribution"):
|
|
self.dist = distribution
|
|
self._called = False
|
|
self._disabled = False
|
|
self._skip_ext_modules = False
|
|
|
|
def _disable(self):
|
|
"""Internal API to disable automatic discovery"""
|
|
self._disabled = True
|
|
|
|
def _ignore_ext_modules(self):
|
|
"""Internal API to disregard ext_modules.
|
|
|
|
Normally auto-discovery would not be triggered if ``ext_modules`` are set
|
|
(this is done for backward compatibility with existing packages relying on
|
|
``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function
|
|
to ignore given ``ext_modules`` and proceed with the auto-discovery if
|
|
``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml
|
|
metadata).
|
|
"""
|
|
self._skip_ext_modules = True
|
|
|
|
@property
|
|
def _root_dir(self) -> _Path:
|
|
# The best is to wait until `src_root` is set in dist, before using _root_dir.
|
|
return self.dist.src_root or os.curdir
|
|
|
|
@property
|
|
def _package_dir(self) -> Dict[str, str]:
|
|
if self.dist.package_dir is None:
|
|
return {}
|
|
return self.dist.package_dir
|
|
|
|
def __call__(self, force=False, name=True, ignore_ext_modules=False):
|
|
"""Automatically discover missing configuration fields
|
|
and modifies the given ``distribution`` object in-place.
|
|
|
|
Note that by default this will only have an effect the first time the
|
|
``ConfigDiscovery`` object is called.
|
|
|
|
To repeatedly invoke automatic discovery (e.g. when the project
|
|
directory changes), please use ``force=True`` (or create a new
|
|
``ConfigDiscovery`` instance).
|
|
"""
|
|
if force is False and (self._called or self._disabled):
|
|
# Avoid overhead of multiple calls
|
|
return
|
|
|
|
self._analyse_package_layout(ignore_ext_modules)
|
|
if name:
|
|
self.analyse_name() # depends on ``packages`` and ``py_modules``
|
|
|
|
self._called = True
|
|
|
|
def _explicitly_specified(self, ignore_ext_modules: bool) -> bool:
|
|
"""``True`` if the user has specified some form of package/module listing"""
|
|
ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules
|
|
ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules)
|
|
return (
|
|
self.dist.packages is not None
|
|
or self.dist.py_modules is not None
|
|
or ext_modules
|
|
or hasattr(self.dist, "configuration") and self.dist.configuration
|
|
# ^ Some projects use numpy.distutils.misc_util.Configuration
|
|
)
|
|
|
|
def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool:
|
|
if self._explicitly_specified(ignore_ext_modules):
|
|
# For backward compatibility, just try to find modules/packages
|
|
# when nothing is given
|
|
return True
|
|
|
|
log.debug(
|
|
"No `packages` or `py_modules` configuration, performing "
|
|
"automatic discovery."
|
|
)
|
|
|
|
return (
|
|
self._analyse_explicit_layout()
|
|
or self._analyse_src_layout()
|
|
# flat-layout is the trickiest for discovery so it should be last
|
|
or self._analyse_flat_layout()
|
|
)
|
|
|
|
def _analyse_explicit_layout(self) -> bool:
|
|
"""The user can explicitly give a package layout via ``package_dir``"""
|
|
package_dir = self._package_dir.copy() # don't modify directly
|
|
package_dir.pop("", None) # This falls under the "src-layout" umbrella
|
|
root_dir = self._root_dir
|
|
|
|
if not package_dir:
|
|
return False
|
|
|
|
log.debug(f"`explicit-layout` detected -- analysing {package_dir}")
|
|
pkgs = chain_iter(
|
|
_find_packages_within(pkg, os.path.join(root_dir, parent_dir))
|
|
for pkg, parent_dir in package_dir.items()
|
|
)
|
|
self.dist.packages = list(pkgs)
|
|
log.debug(f"discovered packages -- {self.dist.packages}")
|
|
return True
|
|
|
|
def _analyse_src_layout(self) -> bool:
|
|
"""Try to find all packages or modules under the ``src`` directory
|
|
(or anything pointed by ``package_dir[""]``).
|
|
|
|
The "src-layout" is relatively safe for automatic discovery.
|
|
We assume that everything within is meant to be included in the
|
|
distribution.
|
|
|
|
If ``package_dir[""]`` is not given, but the ``src`` directory exists,
|
|
this function will set ``package_dir[""] = "src"``.
|
|
"""
|
|
package_dir = self._package_dir
|
|
src_dir = os.path.join(self._root_dir, package_dir.get("", "src"))
|
|
if not os.path.isdir(src_dir):
|
|
return False
|
|
|
|
log.debug(f"`src-layout` detected -- analysing {src_dir}")
|
|
package_dir.setdefault("", os.path.basename(src_dir))
|
|
self.dist.package_dir = package_dir # persist eventual modifications
|
|
self.dist.packages = PEP420PackageFinder.find(src_dir)
|
|
self.dist.py_modules = ModuleFinder.find(src_dir)
|
|
log.debug(f"discovered packages -- {self.dist.packages}")
|
|
log.debug(f"discovered py_modules -- {self.dist.py_modules}")
|
|
return True
|
|
|
|
def _analyse_flat_layout(self) -> bool:
|
|
"""Try to find all packages and modules under the project root.
|
|
|
|
Since the ``flat-layout`` is more dangerous in terms of accidentally including
|
|
extra files/directories, this function is more conservative and will raise an
|
|
error if multiple packages or modules are found.
|
|
|
|
This assumes that multi-package dists are uncommon and refuse to support that
|
|
use case in order to be able to prevent unintended errors.
|
|
"""
|
|
log.debug(f"`flat-layout` detected -- analysing {self._root_dir}")
|
|
return self._analyse_flat_packages() or self._analyse_flat_modules()
|
|
|
|
def _analyse_flat_packages(self) -> bool:
|
|
self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir)
|
|
top_level = remove_nested_packages(remove_stubs(self.dist.packages))
|
|
log.debug(f"discovered packages -- {self.dist.packages}")
|
|
self._ensure_no_accidental_inclusion(top_level, "packages")
|
|
return bool(top_level)
|
|
|
|
def _analyse_flat_modules(self) -> bool:
|
|
self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir)
|
|
log.debug(f"discovered py_modules -- {self.dist.py_modules}")
|
|
self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules")
|
|
return bool(self.dist.py_modules)
|
|
|
|
def _ensure_no_accidental_inclusion(self, detected: List[str], kind: str):
|
|
if len(detected) > 1:
|
|
from inspect import cleandoc
|
|
|
|
from setuptools.errors import PackageDiscoveryError
|
|
|
|
msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}.
|
|
|
|
To avoid accidental inclusion of unwanted files or directories,
|
|
setuptools will not proceed with this build.
|
|
|
|
If you are trying to create a single distribution with multiple {kind}
|
|
on purpose, you should not rely on automatic discovery.
|
|
Instead, consider the following options:
|
|
|
|
1. set up custom discovery (`find` directive with `include` or `exclude`)
|
|
2. use a `src-layout`
|
|
3. explicitly set `py_modules` or `packages` with a list of names
|
|
|
|
To find more information, look for "package discovery" on setuptools docs.
|
|
"""
|
|
raise PackageDiscoveryError(cleandoc(msg))
|
|
|
|
def analyse_name(self):
|
|
"""The packages/modules are the essential contribution of the author.
|
|
Therefore the name of the distribution can be derived from them.
|
|
"""
|
|
if self.dist.metadata.name or self.dist.name:
|
|
# get_name() is not reliable (can return "UNKNOWN")
|
|
return None
|
|
|
|
log.debug("No `name` configuration, performing automatic discovery")
|
|
|
|
name = (
|
|
self._find_name_single_package_or_module()
|
|
or self._find_name_from_packages()
|
|
)
|
|
if name:
|
|
self.dist.metadata.name = name
|
|
self.dist.name = name
|
|
|
|
def _find_name_single_package_or_module(self) -> Optional[str]:
|
|
"""Exactly one module or package"""
|
|
for field in ('packages', 'py_modules'):
|
|
items = getattr(self.dist, field, None) or []
|
|
if items and len(items) == 1:
|
|
log.debug(f"Single module/package detected, name: {items[0]}")
|
|
return items[0]
|
|
|
|
return None
|
|
|
|
def _find_name_from_packages(self) -> Optional[str]:
|
|
"""Try to find the root package that is not a PEP 420 namespace"""
|
|
if not self.dist.packages:
|
|
return None
|
|
|
|
packages = remove_stubs(sorted(self.dist.packages, key=len))
|
|
package_dir = self.dist.package_dir or {}
|
|
|
|
parent_pkg = find_parent_package(packages, package_dir, self._root_dir)
|
|
if parent_pkg:
|
|
log.debug(f"Common parent package detected, name: {parent_pkg}")
|
|
return parent_pkg
|
|
|
|
log.warn("No parent package detected, impossible to derive `name`")
|
|
return None
|
|
|
|
|
|
def remove_nested_packages(packages: List[str]) -> List[str]:
|
|
"""Remove nested packages from a list of packages.
|
|
|
|
>>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"])
|
|
['a']
|
|
>>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"])
|
|
['a', 'b', 'c.d', 'g.h']
|
|
"""
|
|
pkgs = sorted(packages, key=len)
|
|
top_level = pkgs[:]
|
|
size = len(pkgs)
|
|
for i, name in enumerate(reversed(pkgs)):
|
|
if any(name.startswith(f"{other}.") for other in top_level):
|
|
top_level.pop(size - i - 1)
|
|
|
|
return top_level
|
|
|
|
|
|
def remove_stubs(packages: List[str]) -> List[str]:
|
|
"""Remove type stubs (:pep:`561`) from a list of packages.
|
|
|
|
>>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"])
|
|
['a', 'a.b', 'b']
|
|
"""
|
|
return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")]
|
|
|
|
|
|
def find_parent_package(
|
|
packages: List[str], package_dir: Mapping[str, str], root_dir: _Path
|
|
) -> Optional[str]:
|
|
"""Find the parent package that is not a namespace."""
|
|
packages = sorted(packages, key=len)
|
|
common_ancestors = []
|
|
for i, name in enumerate(packages):
|
|
if not all(n.startswith(f"{name}.") for n in packages[i+1:]):
|
|
# Since packages are sorted by length, this condition is able
|
|
# to find a list of all common ancestors.
|
|
# When there is divergence (e.g. multiple root packages)
|
|
# the list will be empty
|
|
break
|
|
common_ancestors.append(name)
|
|
|
|
for name in common_ancestors:
|
|
pkg_path = find_package_path(name, package_dir, root_dir)
|
|
init = os.path.join(pkg_path, "__init__.py")
|
|
if os.path.isfile(init):
|
|
return name
|
|
|
|
return None
|
|
|
|
|
|
def find_package_path(
|
|
name: str, package_dir: Mapping[str, str], root_dir: _Path
|
|
) -> str:
|
|
"""Given a package name, return the path where it should be found on
|
|
disk, considering the ``package_dir`` option.
|
|
|
|
>>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".")
|
|
>>> path.replace(os.sep, "/")
|
|
'./root/is/nested/my/pkg'
|
|
|
|
>>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".")
|
|
>>> path.replace(os.sep, "/")
|
|
'./root/is/nested/pkg'
|
|
|
|
>>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".")
|
|
>>> path.replace(os.sep, "/")
|
|
'./root/is/nested'
|
|
|
|
>>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".")
|
|
>>> path.replace(os.sep, "/")
|
|
'./other/pkg'
|
|
"""
|
|
parts = name.split(".")
|
|
for i in range(len(parts), 0, -1):
|
|
# Look backwards, the most specific package_dir first
|
|
partial_name = ".".join(parts[:i])
|
|
if partial_name in package_dir:
|
|
parent = package_dir[partial_name]
|
|
return os.path.join(root_dir, parent, *parts[i:])
|
|
|
|
parent = package_dir.get("") or ""
|
|
return os.path.join(root_dir, *parent.split("/"), *parts)
|
|
|
|
|
|
def construct_package_dir(packages: List[str], package_path: _Path) -> Dict[str, str]:
|
|
parent_pkgs = remove_nested_packages(packages)
|
|
prefix = Path(package_path).parts
|
|
return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs}
|