Some more cleanup.

Patch 0005 changes a list to a set. Previously, it checked if each
file was a member of the list before appending it. We can just let
Python do this for us. I've also sorted before returning to make it
behave like gnulib-tool.sh. In func_modules_to_filelist:

    files=`for f in $files; do echo $f; done | LC_ALL=C sort -u`

I seem to have not noticed that since now. Hopefully that should help
me cleanup random sorted() calls in other places without breaking the
test suite...

Patch 0006 turns the avoided modules in GLModuleTable into a set
instead of a list. Since we check every module for membership in the
avoided modules, it makes more sense to use a set. The avoided modules
emitted in the actioncmd and gnulib-comp.m4 are stored in GLConfig, so
doing this doesn't break anything.

Patch 0007 uses defaultdict() instead of dict() for module
dependencies. This has been around for a long time and deals with the
explicit initialization case for you:

>>> var = dict()
>>> var['a'].add(1)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
KeyError: 'a'
>>> from collections import defaultdict
>>> var = defaultdict(set)
>>> var['a'].add(1)
>>> print(var)
defaultdict(<class 'set'>, {'a': {1}})

[1] https://docs.python.org/3/library/collections.html#defaultdict-objects

Collin
From 24af1bb4778f683be73726a3e0b47a022dd75196 Mon Sep 17 00:00:00 2001
From: Collin Funk <collin.fu...@gmail.com>
Date: Mon, 8 Apr 2024 01:48:41 -0700
Subject: [PATCH 5/7] gnulib-tool.py: Simplify file list construction.

* pygnulib/GLModuleSystem.py (GLModuleTable.filelist): Use a set to
construct the file list instead of looping through a list for each file.
Sort the result to match gnulib-tool.sh.
---
 ChangeLog                  | 7 +++++++
 pygnulib/GLModuleSystem.py | 7 +++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0f0aedd2d2..aaa4d63652 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2024-04-08  Collin Funk  <collin.fu...@gmail.com>
+
+	gnulib-tool.py: Simplify file list construction.
+	* pygnulib/GLModuleSystem.py (GLModuleTable.filelist): Use a set to
+	construct the file list instead of looping through a list for each file.
+	Sort the result to match gnulib-tool.sh.
+
 2024-04-07  Collin Funk  <collin.fu...@gmail.com>
 
 	gnulib-tool.py: Fix incomplete type hint.
diff --git a/pygnulib/GLModuleSystem.py b/pygnulib/GLModuleSystem.py
index 761c43c2e0..1532610466 100644
--- a/pygnulib/GLModuleSystem.py
+++ b/pygnulib/GLModuleSystem.py
@@ -1047,7 +1047,7 @@ class GLModuleTable:
     def filelist(self, modules: list[GLModule]) -> list[str]:
         '''Determine the final file list for the given list of modules.
         The list of modules must already include dependencies.'''
-        filelist = []
+        fileset = set()
         for module in modules:
             if type(module) is not GLModule:
                 raise TypeError('each module must be a GLModule instance')
@@ -1055,9 +1055,8 @@ class GLModuleTable:
                      for module in modules ]
         for listing in listings:
             for file in listing:
-                if file not in filelist:
-                    filelist += [file]
-        return filelist
+                fileset.add(file)
+        return sorted(fileset)
 
     def filelist_separately(self, main_modules: list[GLModule],
                             tests_modules: list[GLModule]) -> tuple[list[str], list[str]]:
-- 
2.44.0

From e9154caf868123ab2558495e831ffd9e097b6c07 Mon Sep 17 00:00:00 2001
From: Collin Funk <collin.fu...@gmail.com>
Date: Mon, 8 Apr 2024 02:58:49 -0700
Subject: [PATCH 6/7] gnulib-tool.py: Use a set instead of list for avoided
 module checks.

* pygnulib/GLModuleSystem.py (GLModuleTable.__init__): Use a set instead
of a list for avoided modules. This is used only for membership checks
when computing the transitive closure of the given modules, therefore
prefer the O(1) average case over O(n).
(GLModuleTable.setAvoids): Remove sorted() call.
---
 ChangeLog                  | 9 +++++++++
 pygnulib/GLModuleSystem.py | 6 +++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index aaa4d63652..db7b7ca3ec 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2024-04-08  Collin Funk  <collin.fu...@gmail.com>
+
+	gnulib-tool.py: Use a set instead of list for avoided module checks.
+	* pygnulib/GLModuleSystem.py (GLModuleTable.__init__): Use a set instead
+	of a list for avoided modules. This is used only for membership checks
+	when computing the transitive closure of the given modules, therefore
+	prefer the O(1) average case over O(n).
+	(GLModuleTable.setAvoids): Remove sorted() call.
+
 2024-04-08  Collin Funk  <collin.fu...@gmail.com>
 
 	gnulib-tool.py: Simplify file list construction.
diff --git a/pygnulib/GLModuleSystem.py b/pygnulib/GLModuleSystem.py
index 1532610466..60e9846a66 100644
--- a/pygnulib/GLModuleSystem.py
+++ b/pygnulib/GLModuleSystem.py
@@ -750,11 +750,11 @@ class GLModuleTable:
                             % type(inc_all_direct_tests).__name__)
         self.inc_all_direct_tests = inc_all_direct_tests
         self.inc_all_indirect_tests = inc_all_indirect_tests
-        self.avoids = []  # Avoids
+        self.avoids = set()  # Avoids
         for avoid in self.config.getAvoids():
             module = self.modulesystem.find(avoid)
             if module:
-                self.avoids.append(module)
+                self.avoids.add(module)
 
     def __repr__(self) -> str:
         '''x.__repr__() <==> repr(x)'''
@@ -1080,7 +1080,7 @@ class GLModuleTable:
         for module in modules:
             if type(module) is not GLModule:
                 raise TypeError('each module must be a GLModule instance')
-        self.avoids = sorted(set(modules))
+        self.avoids = set(modules)
 
     def getBaseModules(self) -> list[GLModule]:
         '''Return list of base modules.'''
-- 
2.44.0

From 60cf5da7155a4b4eb255704e76124981928a300a Mon Sep 17 00:00:00 2001
From: Collin Funk <collin.fu...@gmail.com>
Date: Mon, 8 Apr 2024 03:41:53 -0700
Subject: [PATCH 7/7] gnulib-tool.py: Use a defaultdict to simplify code.

* pygnulib/GLModuleSystem.py (GLModuleTable.__init__): Use a defaultdict
so the initial value for a key is handled for us.
(GLModuleTable.addConditional): Remove the initial value case.
---
 ChangeLog                  | 7 +++++++
 pygnulib/GLModuleSystem.py | 8 +++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index db7b7ca3ec..e4b70568ab 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2024-04-08  Collin Funk  <collin.fu...@gmail.com>
+
+	gnulib-tool.py: Use a defaultdict to simplify code.
+	* pygnulib/GLModuleSystem.py (GLModuleTable.__init__): Use a defaultdict
+	so the initial value for a key is handled for us.
+	(GLModuleTable.addConditional): Remove the initial value case.
+
 2024-04-08  Collin Funk  <collin.fu...@gmail.com>
 
 	gnulib-tool.py: Use a set instead of list for avoided module checks.
diff --git a/pygnulib/GLModuleSystem.py b/pygnulib/GLModuleSystem.py
index 60e9846a66..a190b053ab 100644
--- a/pygnulib/GLModuleSystem.py
+++ b/pygnulib/GLModuleSystem.py
@@ -24,6 +24,7 @@ import sys
 import codecs
 import hashlib
 import subprocess as sp
+from collections import defaultdict
 from . import constants
 from .GLError import GLError
 from .GLConfig import GLConfig
@@ -732,7 +733,7 @@ class GLModuleTable:
           returns the condition when B should be enabled as a dependency of A,
           once the m4 code for A has been executed.
         '''
-        self.dependers = dict()  # Dependencies
+        self.dependers = defaultdict(list)  # Dependencies
         self.conditionals = dict()  # Conditional modules
         self.unconditionals = dict()  # Unconditional modules
         self.base_modules = []  # Base modules
@@ -789,10 +790,7 @@ class GLModuleTable:
                             % type(condition).__name__)
         if not str(module) in self.unconditionals:
             # No unconditional dependency to the given module is known at this point.
-            if str(module) not in self.dependers:
-                self.dependers[str(module)] = []
-            if str(parent) not in self.dependers[str(module)]:
-                self.dependers[str(module)].append(str(parent))
+            self.dependers[str(module)].append(str(parent))
             key = '%s---%s' % (str(parent), str(module))
             self.conditionals[key] = condition
 
-- 
2.44.0

Reply via email to