[gentoo-commits] proj/pkgcore/snakeoil:master commit in: src/snakeoil/tools/
commit: 5308afe0c9b7bbceec8c1cf7d31145afb24ef371
Author: Brian Harring gmail com>
AuthorDate: Sat Jan 10 13:30:36 2026 +
Commit: Brian Harring gmail com>
CommitDate: Sat Jan 10 13:31:35 2026 +
URL:
https://gitweb.gentoo.org/proj/pkgcore/snakeoil.git/commit/?id=5308afe0
tools.imports: break out a resusable module ast walker
Signed-off-by: Brian Harring gmail.com>
src/snakeoil/tools/imports.py | 97 +--
1 file changed, 57 insertions(+), 40 deletions(-)
diff --git a/src/snakeoil/tools/imports.py b/src/snakeoil/tools/imports.py
index 4388837..1d70d7f 100644
--- a/src/snakeoil/tools/imports.py
+++ b/src/snakeoil/tools/imports.py
@@ -223,6 +223,59 @@ class AttributeCollector(ast.NodeVisitor):
mod.accessed_by[parts[0]].add(self.current)
+class ModuleCollector:
+__slots__ = ("ast_sources", "root")
+ast_sources: dict[ModuleImport, tuple[Path, ast.Module]]
+root: ModuleImport
+
+def __init__(self):
+self.root = ModuleImport(None, None, "")
+self.ast_sources = {}
+
+def add_namespace(self, namespace: str) -> list[ModuleImport]:
+collected = []
+# pre-initialize the module tree of what we care about.
+for module in get_submodules_of(namespace, include_root=True):
+obj = self.root.create(module.__name__.split("."))
+obj.alls = getattr(module, "__all__", None)
+p = Path(cast(str, module.__file__))
+with p.open("r") as f:
+self.ast_sources[obj] = (p, ast.parse(f.read(), str(p)))
+collected.append(obj)
+return collected
+
+def finalize(self):
+# collect and finalize imports, then run analysis based on attribute
access.
+
+# Note: the import collection may need to run multiple times.
Consider:
+# klass.py:
+# __all__ = ('blah', 'foon')
+# from .other import blah, foon
+#
+# If some other module tries to travers klass.py before those from
imports have been placed, the
+# other module will think it stopped at an attribute for 'blah'.
Which isn't correct.
+# They internally detect this conflict and mark a boolean to indicate
if a reprocessing is needed.
+must_be_processed = list(self.ast_sources)
+for run in range(0, 10):
+for mod in must_be_processed:
+p, tree = self.ast_sources[mod]
+ImportCollector(self.root, mod, mod.qualname, p).visit(tree)
+
+if new_reprocess := [
+mod for mod in self.ast_sources if mod.requires_reprocessing
+]:
+if len(new_reprocess) == len(must_be_processed):
+raise Exception("cycle encountered")
+must_be_processed = new_reprocess
+else:
+break
+
+for mod, (p, tree) in self.ast_sources.items():
+AttributeCollector(self.root, mod).visit(tree)
+
+
+# parser functionality goes below
+
parser = arghparse.ArgumentParser(
prog=__name__.rsplit(".", 1)[-1],
)
@@ -262,46 +315,10 @@ unused.add_argument(
@unused.bind_main_func
def main(options, out, err) -> int:
-root = ModuleImport(None, None, "")
-
-target_modules: set[ModuleImport] = set()
-ast_sources = {}
-# pre-initialize the module tree of what we care about.
-for target in tuple(options.consumers) + (options.target,):
-for module in get_submodules_of(target, include_root=True):
-obj = root.create(module.__name__.split("."))
-obj.alls = getattr(module, "__all__", None)
-p = Path(cast(str, module.__file__))
-with p.open("r") as f:
-ast_sources[obj] = (p, ast.parse(f.read(), str(p)))
-if target == options.target:
-target_modules.add(obj)
-
-# collect and finalize imports, then run analysis based on attribute
access.
-
-# Note: the import collection may need to run multiple times. Consider:
-# klass.py:
-# __all__ = ('blah', 'foon')
-# from .other import blah, foon
-#
-# If some other module tries to travers klass.py before those from imports
have been placed, the
-# other module will think it stopped at an attribute for 'blah'. Which
isn't correct.
-# They internally detect this conflict and mark a boolean to indicate if a
reprocessing is needed.
-must_be_processed = list(ast_sources)
-for run in range(0, 10):
-for mod in must_be_processed:
-p, tree = ast_sources[mod]
-ImportCollector(root, mod, mod.qualname, p).visit(tree)
-
-if new_reprocess := [mod for mod in ast_sources if
mod.requires_reprocessing]:
-if len(new_reprocess) == len(must_be_processed):
-raise Exception("cycle encountered")
-must_be_processed = new_reprocess
-else:
-break
-
-for mod, (p, tree) in a
[gentoo-commits] proj/pkgcore/snakeoil:master commit in: src/snakeoil/tools/
commit: 0413ed6e6ee5165e545bf976bad6c11efee98ce8
Author: Brian Harring gmail com>
AuthorDate: Sat Jan 10 13:48:00 2026 +
Commit: Brian Harring gmail com>
CommitDate: Sat Jan 10 17:59:01 2026 +
URL:
https://gitweb.gentoo.org/proj/pkgcore/snakeoil.git/commit/?id=0413ed6e
tools.imports used for analyzing __all_ and cross package usage
__all__ is, in theory, the 'public' api of a given module; it's not
that simple, but it *is* the signal of what is public and what isn't.
This tooling reuses the import ast machinery to examine __alls__
identifying when access exists to something not enumerated in
__all__.
This is useful for finding accidental "shiat, a downstream uses
that, I have to be careful" and for shoring up the '__all__' in
a given module so one can then privatize the internals.
Signed-off-by: Brian Harring gmail.com>
src/snakeoil/tools/imports.py | 80 +++
1 file changed, 73 insertions(+), 7 deletions(-)
diff --git a/src/snakeoil/tools/imports.py b/src/snakeoil/tools/imports.py
index 1d70d7f..48ed089 100644
--- a/src/snakeoil/tools/imports.py
+++ b/src/snakeoil/tools/imports.py
@@ -4,6 +4,7 @@ __all__ = ("main",)
import ast
+import functools
import logging
import sys
from collections import defaultdict
@@ -26,7 +27,16 @@ class CtxAccess(NamedTuple):
# This classes are effectively a tree that can be walked backwards as
# we recurse into the import pathways where they reference back down the
pathways.
# It is cyclic as all hell.
[email protected]_ordering
class ModuleImport(dict[str, "ModuleImport"]):
+"""
+Usage notes:
+
+Be aware that .accessed_by records *all* access- whether it be an
attribute in this module,
+or if it's an access of a submodule of this given node. accessed_by isn't
a list
+of attributes, use .attrs if you want that (which filters out all known
submodules)
+"""
+
def __init__(self, root: Self | None, parent: Self | None, name: str) ->
None:
self.root = self if root is None else root # oh yeah, cyclic baby.
self.parent = self.root if parent is None else parent
@@ -43,7 +53,10 @@ class ModuleImport(dict[str, "ModuleImport"]):
return hash(self.qualname)
def __eq__(self, other):
-return self is other
+return self is other or self.qualname == other.qualname
+
+def __lt__(self, other):
+return self.qualname < other.qualname
@property
def qualname(self):
@@ -54,6 +67,10 @@ class ModuleImport(dict[str, "ModuleImport"]):
current = current.parent
return ".".join(reversed(l))
+@property
+def known_attrs(self):
+return tuple(name for name in self.accessed_by if name not in self)
+
def create(self, chunks: list[str]) -> "ModuleImport":
assert len(chunks)
name, chunks = chunks[0], chunks[1:]
@@ -88,7 +105,7 @@ class ModuleImport(dict[str, "ModuleImport"]):
return (parts, current)
def __str__(self) -> str:
-return f"{self.qualname}: access={self.accessed_by!r}
unscoped={self.unscoped_accessers!r} known
ctx={list(sorted(self.ctx_imports.keys()))!r}"
+return f"{self.qualname}: access={'{'}{',
'.join(self.accessed_by)}{'}'} unscoped={self.unscoped_accessers!r} known
ctx={list(sorted(self.ctx_imports.keys()))!r}"
def __repr__(self):
return str(self)
@@ -232,7 +249,10 @@ class ModuleCollector:
self.root = ModuleImport(None, None, "")
self.ast_sources = {}
-def add_namespace(self, namespace: str) -> list[ModuleImport]:
+def add_namespace(
+self,
+namespace: str,
+) -> list[ModuleImport]:
collected = []
# pre-initialize the module tree of what we care about.
for module in get_submodules_of(namespace, include_root=True):
@@ -315,13 +335,14 @@ unused.add_argument(
@unused.bind_main_func
def main(options, out, err) -> int:
-collecter = ModuleCollector()
-target_modules = collecter.add_namespace(options.target)
+collector = ModuleCollector()
+target_modules = sorted(collector.add_namespace(options.target))
for consumer in options.consumers:
-collecter.add_namespace(consumer)
+collector.add_namespace(consumer)
+collector.finalize()
results = []
-for mod in sorted(target_modules, key=lambda x: x.qualname):
+for mod in target_modules:
results.append(result := [mod.qualname])
if mod.alls is None:
result.append(f"{mod.qualname} has no __all__. Not analyzing")
@@ -356,5 +377,50 @@ def main(options, out, err) -> int:
return 0
+used = subparsers.add_parser(
+"used", help="analyze a namespace consumers for potential __all_
modifications"
+)
+used.add_argument(
+"target",
+action="store",
+type=str,
+help="the python module to import and scan recursively, using __all__ to
find things only used within that codebase
[gentoo-commits] proj/pkgcore/snakeoil:master commit in: src/snakeoil/tools/
commit: 1c6fb14f45ddccdeffe7175794c82098ba01bc49
Author: Brian Harring gmail com>
AuthorDate: Sat Jan 10 12:49:42 2026 +
Commit: Brian Harring gmail com>
CommitDate: Sat Jan 10 13:08:48 2026 +
URL:
https://gitweb.gentoo.org/proj/pkgcore/snakeoil.git/commit/?id=1c6fb14f
tools.{find_unused_exports -> imports}; renamne and use subcommands
The import ast machinery is usable for doing further __all__ analysis,
so this will become a general tool for dealing with imports.
Also fix a double reporting bug when source modules namespace is target
modules namespace.
Signed-off-by: Brian Harring gmail.com>
.../tools/{find_unused_exports.py => imports.py} | 58 +++---
1 file changed, 28 insertions(+), 30 deletions(-)
diff --git a/src/snakeoil/tools/find_unused_exports.py
b/src/snakeoil/tools/imports.py
similarity index 90%
rename from src/snakeoil/tools/find_unused_exports.py
rename to src/snakeoil/tools/imports.py
index 5d834fa..4388837 100644
--- a/src/snakeoil/tools/find_unused_exports.py
+++ b/src/snakeoil/tools/imports.py
@@ -3,7 +3,6 @@
__all__ = ("main",)
-import argparse
import ast
import logging
import sys
@@ -12,12 +11,10 @@ from pathlib import Path
from textwrap import dedent
from typing import NamedTuple, Optional, Self, cast
+from snakeoil.cli import arghparse
+from snakeoil.cli.tool import Tool
from snakeoil.python_namespaces import get_submodules_of
-# Generally hard requirement- avoid relying on snakeoil here. At somepoint
this
-# should be able to be pointed right back at snakeoil for finding components
internally
-# that are unused.
-
logger = logging.getLogger(__name__)
@@ -150,14 +147,14 @@ class ImportCollector(ast.NodeVisitor):
# just rewrite into absolute pathing
base: list[str]
if node.level:
-base = self.current.qualname.split(".")
+base = self.current.qualname.split(".") # pyright:
ignore[reportAssignmentType]
level = node.level - self.level_adjustment
if level:
base = base[:-level]
if node.module:
base.extend(node.module.split("."))
else:
-base = node.module.split(".")
+base = node.module.split(".") # pyright:
ignore[reportOptionalMemberAccess]
for alias in node.names:
asname = self.get_asname(alias)
self.update_must_reprocess(asname)
@@ -197,16 +194,13 @@ class AttributeCollector(ast.NodeVisitor):
# terminus. This node won't have attr.
lookup.append(last)
break
-lookup.append(value.attr)
-node = node.value
+lookup.append(value.attr) # pyright:
ignore[reportAttributeAccessIssue]
+node = node.value # pyright:
ignore[reportAttributeAccessIssue]
except Exception as e:
print(
f"ast traversal bug in {self.current.qualname} for original
{type(node)}={node} sub-value {type(value)}={value}"
)
-import pdb
-
-pdb.set_trace()
raise e
lookup.reverse()
@@ -229,8 +223,15 @@ class AttributeCollector(ast.NodeVisitor):
mod.accessed_by[parts[0]].add(self.current)
-parser = argparse.ArgumentParser(
-__name__.rsplit(".", 1)[-1],
+parser = arghparse.ArgumentParser(
+prog=__name__.rsplit(".", 1)[-1],
+)
+
+subparsers = parser.add_subparsers(description="commands")
+
+unused = subparsers.add_parser(
+"unused",
+help="tooling for finding used __all__ exports",
description=dedent(
"""\
Tool for finding potentially dead code
@@ -248,35 +249,33 @@ parser = argparse.ArgumentParser(
"""
),
)
-parser.add_argument(
-"source",
+unused.add_argument(
+"target",
action="store",
type=str,
help="the python module to import and scan recursively, using __all__ to
find things only used within that codebase.",
)
-parser.add_argument(
-"targets", type=str, nargs="+", help="python namespaces to scan for usage."
-)
-parser.add_argument(
-"-v", action="store_true", default=False, dest="verbose", help="Increase
verbosity"
+unused.add_argument(
+"consumers", type=str, nargs="+", help="python namespaces to scan for
usage."
)
[email protected]_main_func
def main(options, out, err) -> int:
root = ModuleImport(None, None, "")
-source_modules: list[ModuleImport] = []
+target_modules: set[ModuleImport] = set()
ast_sources = {}
# pre-initialize the module tree of what we care about.
-for target in tuple(options.targets) + (options.source,):
+for target in tuple(options.consumers) + (options.target,):
for module in get_submodules_of(target, include_root=True):
obj = root.create(module.__name__.split("."))
obj.alls = getattr(module, "__all__", None)
p = Pa
[gentoo-commits] proj/pkgcore/snakeoil:master commit in: src/snakeoil/tools/
commit: ff280478d9be3b3abac919c4e553270a91c2d341
Author: Brian Harring gmail com>
AuthorDate: Sun Nov 30 18:00:56 2025 +
Commit: Brian Harring gmail com>
CommitDate: Sun Nov 30 20:01:45 2025 +
URL:
https://gitweb.gentoo.org/proj/pkgcore/snakeoil.git/commit/?id=ff280478
chore: find_unused_exports basically fully works now
This won't pick up getattr usage or importlib, nor
commutative assigns of an import variable, but
it catches the rest.
Signed-off-by: Brian Harring gmail.com>
src/snakeoil/tools/find_unused_exports.py | 275 +++---
1 file changed, 217 insertions(+), 58 deletions(-)
diff --git a/src/snakeoil/tools/find_unused_exports.py
b/src/snakeoil/tools/find_unused_exports.py
index 36addae..7a6f43d 100644
--- a/src/snakeoil/tools/find_unused_exports.py
+++ b/src/snakeoil/tools/find_unused_exports.py
@@ -7,9 +7,11 @@ import argparse
import ast
import logging
import sys
+from collections import defaultdict
+from importlib import import_module
from pathlib import Path
from textwrap import dedent
-from typing import Self, cast
+from typing import NamedTuple, Optional, Self, cast
from snakeoil.python_namespaces import get_submodules_of
@@ -20,23 +22,32 @@ from snakeoil.python_namespaces import get_submodules_of
logger = logging.getLogger(__name__)
+class CtxAccess(NamedTuple):
+attr: str
+module: "ModuleImport"
+
+
# This classes are effectively a tree that can be walked backwards as
# we recurse into the import pathways where they reference back down the
pathways.
# It is cyclic as all hell.
-class ModuleImport(ast.NodeVisitor, dict[str, "ModuleImport"]):
-__slots__ = ("root", "parent", "name", "accesses", "unscoped_access",
"ctx_imports")
-
+class ModuleImport(dict[str, "ModuleImport"]):
def __init__(self, root: Self | None, parent: Self | None, name: str) ->
None:
-if name == "pkgcore.vdb.repo_ops":
-import pdb
-
-pdb.set_trace()
self.root = self if root is None else root # oh yeah, cyclic baby.
self.parent = self.root if parent is None else parent
self.name = name
-self.accesses: set[str] = set()
-self.unscoped_access: set[str] = set()
-self.ctx_imports = dict[str, Self]()
+# this is recordings of other modules accessing us.
+self.accessed_by: dict[str, set["ModuleImport"]] = defaultdict(set)
+# This is a mapping of the local name to the target namespace
+self.ctx_imports = dict[str, CtxAccess]()
+self.unscoped_accessers: set[str] = set()
+self.requires_reprocessing = False
+self.alls = None
+
+def __hash__(self) -> int: # type: ignore
+return hash(self.qualname)
+
+def __eq__(self, other):
+return self is other
@property
def qualname(self):
@@ -47,28 +58,41 @@ class ModuleImport(ast.NodeVisitor, dict[str,
"ModuleImport"]):
current = current.parent
return ".".join(reversed(l))
-def __missing__(self, name: str) -> "ModuleImport":
-assert "." not in name
-self[name] = obj = self.__class__(self.root, parent=self, name=name)
+def create(self, chunks: list[str]) -> "ModuleImport":
+assert len(chunks)
+name, chunks = chunks[0], chunks[1:]
+obj = self.setdefault(name, self.__class__(self.root, parent=self,
name=name))
+if chunks:
+return obj.create(chunks)
return obj
-def resolve_import(self, name: str) -> "ModuleImport":
+def resolve_import(
+self,
+name: str,
+requester: Optional["ModuleImport"],
+) -> tuple[list[str], "ModuleImport"]:
parts = name.split(".")
+assert all(parts)
+current = self
-current = self if parts[0] == "" else self.root
-while parts and parts[0] == "":
-if current is self.root:
-raise Exception(
-f"in {self.qualname}, an import tried to climb past root:
{name}"
-)
-current = current.parent
+while parts:
+if requester is not None:
+current.accessed_by[parts[0]].add(requester)
+if parts[0] not in current:
+break
+current = current[parts[0]]
parts = parts[1:]
-for part in parts:
-current = current[part]
-return current
+
+try:
+assert parts or self.root is not current
+except AssertionError as _e:
+# structured this way to make debugging easier
+raise
+
+return (parts, current)
def __str__(self) -> str:
-return f"{self.qualname}: access={self.accesses!r}
unscoped={self.unscoped_access!r} known
ctx={list(sorted(self.ctx_imports.keys()))!r}"
+return f"{self.qualname}: access={self.accessed_by!r}
unscoped={self.unscoped_accessers!r} known
ctx={list(sorted(self.ctx
