commit:     e0adc1721f392c89c8262c4f864f6b1edf796edc
Author:     Matt Jolly <kangie <AT> gentoo <DOT> org>
AuthorDate: Fri Sep 27 00:46:24 2024 +0000
Commit:     Matt Jolly <kangie <AT> gentoo <DOT> org>
CommitDate: Fri Sep 27 00:52:11 2024 +0000
URL:        
https://gitweb.gentoo.org/proj/chromium-tools.git/commit/?id=e0adc172

get-opera-version-mapping: major refactor

- Rework the logic to get a better result when remediating
- Also store the version mapping in a dataclass (why not).
- Use packaging.version.Version to make sorting versions trivial
- Accept positional arguments for the max and min versions.

Signed-off-by: Matt Jolly <kangie <AT> gentoo.org>

 get-opera-version-mapping.py | 118 +++++++++++++++++++++++++++++++------------
 1 file changed, 86 insertions(+), 32 deletions(-)

diff --git a/get-opera-version-mapping.py b/get-opera-version-mapping.py
index 6d6f3de..015fd21 100755
--- a/get-opera-version-mapping.py
+++ b/get-opera-version-mapping.py
@@ -1,6 +1,32 @@
 #!/usr/bin/env python
+
+# SPDX-License-Identifier: GPL-2.0-or-later
+# This script is used to extract Opera and Chromium versions from the Opera 
changelog (blog)
+# This is incomplete data, so we need to fill in the gaps with the Chromium 
version from the previous known version
+# The intent here is to have _some_ sort of datasource to identify a 
potentially-fixed version of Opera based on
+# the Chromium version it includes.
+# High level logic:
+# We can fetch the opera blog posts that relate to a major version of Opera as 
long as they don't change their URIs.
+# We iterate over H4 elements to get the Opera version (and date, though we 
throw that away)
+# We then iterate over child elements until we find an "Update Chromium" 
entry, which we can use to get the
+# Chromium version (in which case we bail early) Or we exhaust the children 
and give up.
+# Lather, rinse, repeat.
+
+import argparse, dataclasses
+
 import requests
 from bs4 import BeautifulSoup
+from packaging.version import Version
+
+
[email protected]
+class OperaChromiumVersion:
+    opera_version: Version
+    chromium_version: Version
+
+    def __str__(self):
+        chromium_version_str = 'unknown' if self.chromium_version == 
Version('0.0.0.0') else str(self.chromium_version)
+        return f"Opera Version: {self.opera_version}, Chromium Version: 
{chromium_version_str}"
 
 
 def get_opera_chromium_versions(base_url, start_version, end_version):
@@ -15,16 +41,11 @@ def get_opera_chromium_versions(base_url, start_version, 
end_version):
         end_version: The ending version to extract information for (inclusive).
 
     Returns:
-        A dictionary mapping Opera version to Chromium version.
-        If no update is mentioned, the previous Chromium version is used.
-        For missing data or errors, "unknown" is used.
+        A list of OperaChromiumVersion objects containing the extracted 
version information.
     """
-    versions = {}
-    chromium_version = None
+    versions: list[OperaChromiumVersion] = []
 
     for version in range(start_version, end_version + 1):
-        # Fix formatting issue:
-        # OR  url = base_url.format(version)
         url = base_url.format(version)
         print(f"Processing version {version}")
 
@@ -38,8 +59,8 @@ def get_opera_chromium_versions(base_url, start_version, 
end_version):
 
             # Iterate through each section starting with an H4 element
             for section in content.find_all('h4'):
+                chromium_version = None
                 version_str, date_str = section.text.strip().split(' – ')
-                versions[version_str] = chromium_version
 
                 # Process all content elements (including nested ones) until 
the next H4
                 next_sibling = section.find_next_sibling(
@@ -63,7 +84,12 @@ def get_opera_chromium_versions(base_url, start_version, 
end_version):
 
                 # Handle missing Chromium version
                 if not chromium_version:
-                    chromium_version = "unknown"
+                    chromium_version = '0.0.0.0'
+
+                versions.append(OperaChromiumVersion(
+                    Version(version_str),
+                    Version(chromium_version)
+                ))
 
         except requests.exceptions.RequestException as e:
             if e.args and e.args[0] == 404:
@@ -76,41 +102,69 @@ def get_opera_chromium_versions(base_url, start_version, 
end_version):
             print(f"Unexpected error: {e}")
             chromium_version = None  # Reset chromium_version for next 
iteration
 
-    return versions
+    # We're broadly sorted by major version, but within each major version we 
get newer entries first
+    # Sort by Opera version to get the correct order
+    sorted_versions = sorted(versions, key=lambda x: x.opera_version)
+    return sorted_versions
 
 
 def remediate_unknown_versions(versions):
     """
-    Remediates entries with "unknown" values in the versions dictionary by
+    Remediates entries with '0.0.0.0' values in the versions dictionary by
     assuming no change from the previous known version.
 
     Args:
-        versions: A dictionary mapping Opera version to Chromium version.
+        versions: A list of OperaChromiumVersion objects containing the 
extracted version information.
 
     Returns:
-        The modified versions dictionary with "unknown" values replaced based 
on previous entries.
+        A list of OperaChromiumVersion objects with '0.0.0.0' values replaced
+        by the previous known version if available.
     """
-    previous_version = None
-    for version, chromium_version in versions.items():
-        if chromium_version == "unknown":
-            if previous_version is not None:
-                # Update with previous version
-                versions[version] = previous_version
+    previous_version: Version = Version('0.0.0.0')
+    fixed_versions: list[OperaChromiumVersion] = []
+
+    for mapping in versions:
+        if mapping.chromium_version == Version('0.0.0.0') and previous_version 
is not Version('0.0.0.0'):
+            # Update with previous version
+            fixed_versions.append(OperaChromiumVersion(mapping.opera_version, 
previous_version))
         else:
-            previous_version = chromium_version  # Update known version for 
future references
-    return versions
+            # This should be fine, we're always parsing from oldest to newest
+            if previous_version < mapping.chromium_version:
+                previous_version = mapping.chromium_version
+            fixed_versions.append(mapping)
+
+    return fixed_versions
+
+
+def parse_arguments():
+    """
+    Parses the command line arguments and returns the parsed values.
+
+    Returns:
+        The parsed command line arguments.
+    """
+    parser = argparse.ArgumentParser(description='Get Opera and Chromium 
versions.')
+    parser.add_argument('start_ver', type=int, help='starting version', 
default=110)
+    parser.add_argument('end_ver', type=int, help='ending version', 
default=115)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_arguments()
+
+    # Base URL with version placeholder
+    base_url = "https://blogs.opera.com/desktop/changelog-for-{}/";
 
+    opera_chromium_versions = get_opera_chromium_versions(base_url, 
args.start_ver, args.end_ver)
+    fixed_versions = remediate_unknown_versions(opera_chromium_versions)
 
-# Example usage
-# Base URL with version placeholder
-base_url = "https://blogs.opera.com/desktop/changelog-for-{}/";
-opera_chromium_versions = get_opera_chromium_versions(base_url, 110, 115)
+    # Print the versions
+    if fixed_versions:
+        for mapping in fixed_versions:
+            print(mapping)
+    else:
+        print("Failed to extract any versions.")
 
-opera_chromium_versions = remediate_unknown_versions(opera_chromium_versions)
 
-if opera_chromium_versions:
-    for opera_version, chromium_version in opera_chromium_versions.items():
-        print(
-            f"Opera Version: {opera_version}, Chromium Version: 
{chromium_version}")
-else:
-    print("Failed to extract any versions.")
+if __name__ == "__main__":
+    main()

Reply via email to