This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch backport-0e11c84-v3-1-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 6172292604c342c6e98e01870415fe40b20c7809 Author: Jarek Potiuk <[email protected]> AuthorDate: Sat Mar 14 18:26:01 2026 +0100 [v3-1-test] Fix IDEA setup to skip .claude directory and add scripts module (#63607) - Add `.claude/` to EXCLUDED_PREFIXES so pyproject.toml discovery skips `.claude/worktrees/` directories - Add `.claude` to _CLEANUP_SKIP_DIRS so IML cleanup ignores it - Add `scripts` to STATIC_MODULES for the scripts distribution (cherry picked from commit 0e11c841f3161860439e9cc8b2baee4e6478feb9) Co-authored-by: Jarek Potiuk <[email protected]> --- dev/ide_setup/setup_idea.py | 1166 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1166 insertions(+) diff --git a/dev/ide_setup/setup_idea.py b/dev/ide_setup/setup_idea.py new file mode 100755 index 00000000000..a19cbfb2bb6 --- /dev/null +++ b/dev/ide_setup/setup_idea.py @@ -0,0 +1,1166 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "rich>=13.6.0", +# "packaging>=24.0", +# ] +# /// +from __future__ import annotations + +import argparse +import os +import platform +import re +import signal +import subprocess +import sys +import time +import uuid +import xml.etree.ElementTree as ET +from pathlib import Path + +from packaging.specifiers import SpecifierSet +from packaging.version import Version +from rich import print +from rich.prompt import Confirm + +ROOT_AIRFLOW_FOLDER_PATH = Path(__file__).parents[2] +IDEA_FOLDER_PATH = ROOT_AIRFLOW_FOLDER_PATH / ".idea" +AIRFLOW_IML_FILE = IDEA_FOLDER_PATH / "airflow.iml" +MODULES_XML_FILE = IDEA_FOLDER_PATH / "modules.xml" +MISC_XML_FILE = IDEA_FOLDER_PATH / "misc.xml" +IDEA_NAME_FILE = IDEA_FOLDER_PATH / ".name" +BREEZE_PATH = ROOT_AIRFLOW_FOLDER_PATH / "dev" / "breeze" + +STATIC_MODULES: list[str] = [ + "airflow-core", + "airflow-ctl", + "task-sdk", + "devel-common", + "dev", + "dev/breeze", + "docker-tests", + "kubernetes-tests", + "helm-tests", + "scripts", + "task-sdk-integration-tests", +] + +# Well-known module groups for --exclude. +MODULE_GROUPS: dict[str, str] = { + "providers": "providers/", + "shared": "shared/", + "dev": "dev", + "tests": "tests", +} + +source_root_module_pattern: str = '<sourceFolder url="file://$MODULE_DIR$/{path}" isTestSource="{status}" />' + +# --------------------------------------------------------------------------- +# Exclude configuration +# --------------------------------------------------------------------------- + +# Directories excluded by pattern (matched recursively against directory names in all content roots). +# Derived from .gitignore entries that can appear at any directory level. +# NOTE: "dist" is intentionally NOT here — providers/fab and providers/edge3 have legitimate +# static asset dist/ directories whitelisted in .gitignore. +EXCLUDE_PATTERNS: list[str] = [ + # Python bytecode / packaging + "__pycache__", + "*.egg-info", + ".eggs", + "build", + "develop-eggs", + "eggs", + "sdist", + "wheels", + "downloads", + "pip-wheel-metadata", + # Test / coverage / lint caches + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", + ".ruff-cache", + ".hypothesis", + ".cache", + ".tox", + "htmlcov", + # Node / frontend + "node_modules", + ".vite", + ".pnpm-store", + # Generated documentation + "_build", + "_doctree", + "_inventory_cache", + "_api", + # Virtualenvs (recursive — root .venv is also in ROOT_EXCLUDE_FOLDERS) + "venv", + # Infrastructure / IaC + ".terraform", + "target", + # IDE / editor directories + ".vscode", + ".cursor", + # Legacy / misc + ".scrapy", + ".ropeproject", + ".spyderproject", + ".webassets-cache", + ".ipynb_checkpoints", +] + +# Directories excluded by explicit path (relative to $MODULE_DIR$, i.e. the project root). +# Derived from root-anchored .gitignore entries (those starting with /). +ROOT_EXCLUDE_FOLDERS: list[str] = [ + ".build", + ".kube", + ".venv", + ".uv-cache", + "dist", + "files", + "logs", + "out", + "tmp", + "images", + "hive_scratch_dir", + "airflow-core/dist", + "airflow-core/src/airflow/ui/coverage", + "generated", + "docker-context-files", + "target-airflow", + "dev/breeze/.venv", + "dev/registry/output", + "dev/registry/logos", + "3rd-party-licenses", + "licenses", + "registry/src/_data/versions", +] + +# --------------------------------------------------------------------------- +# Python version helpers +# --------------------------------------------------------------------------- + +# All minor versions we consider. Keep the upper bound a step ahead of the +# latest CPython release so newly released interpreters are recognised. +_ALL_MINOR_VERSIONS = [f"3.{m}" for m in range(9, 16)] + + +def _read_requires_python(pyproject_path: Path) -> str: + """Return the ``requires-python`` value from *pyproject_path*.""" + text = pyproject_path.read_text() + match = re.search(r'requires-python\s*=\s*"([^"]+)"', text) + if not match: + print(f"[red]Error:[/] could not find requires-python in {pyproject_path}") + sys.exit(1) + return match.group(1) + + +def get_supported_python_versions(pyproject_path: Path) -> list[str]: + """Return the list of supported ``X.Y`` Python versions according to *pyproject_path*.""" + spec = SpecifierSet(_read_requires_python(pyproject_path)) + return [v for v in _ALL_MINOR_VERSIONS if Version(f"{v}.0") in spec] + + +# --------------------------------------------------------------------------- +# XML helpers +# --------------------------------------------------------------------------- + + +def _build_exclude_patterns_xml(indent: str = " ") -> str: + """Build XML lines for <excludePattern> entries.""" + return "\n".join(f'{indent}<excludePattern pattern="{pattern}" />' for pattern in EXCLUDE_PATTERNS) + + +def _build_exclude_folders_xml( + folders: list[str], indent: str = " ", url_prefix: str = "file://$MODULE_DIR$" +) -> str: + """Build XML lines for <excludeFolder> entries.""" + return "\n".join(f'{indent}<excludeFolder url="{url_prefix}/{folder}" />' for folder in folders) + + +def _build_content_xml( + source_lines: str, + *, + include_root_excludes: bool, + indent: str = " ", + url: str = "file://$MODULE_DIR$", +) -> str: + """Build a complete <content> element with sources, exclude folders, and exclude patterns.""" + parts = [f'{indent}<content url="{url}">'] + if source_lines: + parts.append(source_lines) + if include_root_excludes: + parts.append(_build_exclude_folders_xml(ROOT_EXCLUDE_FOLDERS, indent=f"{indent} ")) + parts.append(_build_exclude_patterns_xml(indent=f"{indent} ")) + parts.append(f"{indent}</content>") + return "\n".join(parts) + + +# --- Templates --- + +_iml_common_components = """\ + <component name="PyDocumentationSettings"> + <option name="format" value="PLAIN" /> + <option name="myDocStringFormat" value="Plain" /> + </component> + <component name="TemplatesService"> + <option name="TEMPLATE_FOLDERS"> + <list> + <option value="$MODULE_DIR$/chart/templates" /> + </list> + </option> + </component> + <component name="TestRunnerService"> + <option name="PROJECT_TEST_RUNNER" value="py.test" /> + </component>""" + +single_module_modules_xml_template = """\ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectModuleManager"> + <modules> + <module fileurl="file://$PROJECT_DIR$/.idea/airflow.iml" filepath="$PROJECT_DIR$/.idea/airflow.iml" /> + </modules> + </component> +</project>""" + +multi_module_modules_xml_template = """\ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectModuleManager"> + <modules> + <module fileurl="file://$PROJECT_DIR$/.idea/airflow.iml" filepath="$PROJECT_DIR$/.idea/airflow.iml" /> + {MODULE_ENTRIES} + </modules> + </component> +</project>""" + +multi_module_entry_template = ( + '<module fileurl="file://$PROJECT_DIR$/{iml_path}" filepath="$PROJECT_DIR$/{iml_path}" />' +) + + +def _build_root_iml(sdk_name: str, source_lines: str = "") -> str: + """Build a complete root .iml file (with project-level excludes and common components).""" + content = _build_content_xml(source_lines, include_root_excludes=True, indent=" ") + return ( + '<?xml version="1.0" encoding="UTF-8"?>\n' + '<module type="PYTHON_MODULE" version="4">\n' + ' <component name="NewModuleRootManager">\n' + f"{content}\n" + f' <orderEntry type="jdk" jdkName="{sdk_name}" jdkType="Python SDK" />\n' + ' <orderEntry type="sourceFolder" forTests="false" />\n' + " </component>\n" + f"{_iml_common_components}\n" + "</module>" + ) + + +def _build_sub_module_iml(source_lines: str, *, sdk_name: str = "") -> str: + """Build a sub-module .iml file. + + When *sdk_name* is provided the module gets its own explicit Python SDK; + otherwise it inherits the project SDK. + """ + content = _build_content_xml(source_lines, include_root_excludes=False, indent=" ") + if sdk_name: + jdk_entry = f' <orderEntry type="jdk" jdkName="{sdk_name}" jdkType="Python SDK" />' + else: + jdk_entry = ' <orderEntry type="inheritedJdk" />' + return ( + '<?xml version="1.0" encoding="UTF-8"?>\n' + '<module type="PYTHON_MODULE" version="4">\n' + ' <component name="NewModuleRootManager">\n' + f"{content}\n" + f"{jdk_entry}\n" + ' <orderEntry type="sourceFolder" forTests="false" />\n' + " </component>\n" + ' <component name="TestRunnerService">\n' + ' <option name="PROJECT_TEST_RUNNER" value="py.test" />\n' + " </component>\n" + "</module>" + ) + + +misc_xml_template = """\ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectRootManager" version="2" languageLevel="JDK_25" project-jdk-name="{SDK_NAME}" project-jdk-type="Python SDK" /> +</project>""" + +# --------------------------------------------------------------------------- +# uv sync / SDK detection +# --------------------------------------------------------------------------- + + +def run_uv_sync(project_dir: Path, label: str, *, python_version: str = ""): + """Run ``uv sync`` in *project_dir* to create / update its .venv. + + When *python_version* is given (e.g. ``"3.12"``), ``--python <version>`` + is passed to ``uv sync`` so that the venv is created with that interpreter. + """ + cmd: list[str] = ["uv", "sync"] + if python_version: + cmd += ["--python", python_version] + version_info = f" (python {python_version})" if python_version else "" + print(f"[cyan]Running uv sync in {label}{version_info} ...[/]") + env = {k: v for k, v in os.environ.items() if k != "VIRTUAL_ENV"} + result = subprocess.run(cmd, cwd=project_dir, env=env, check=False) + if result.returncode != 0: + print(f"[red]Error:[/] uv sync failed in {label}. Check the output above.") + sys.exit(1) + print(f"[green]uv sync completed in {label}.[/]\n") + + +def get_sdk_name(venv_dir: Path, *, label: str = "") -> str: + """Return an IntelliJ SDK name for the venv in *venv_dir*. + + Uses the ``uv (<label>)`` naming convention, matching PyCharm's + auto-detected uv interpreter names. When *label* is not given the + directory name is used (e.g. ``uv (airflow-clone)``). + """ + venv_python = venv_dir / ".venv" / "bin" / "python" + if not venv_python.exists(): + print(f"[red]Error:[/] {venv_python} not found even after uv sync.") + sys.exit(1) + if not label: + label = venv_dir.name + return f"uv ({label})" + + +# --------------------------------------------------------------------------- +# Global JetBrains SDK registration +# --------------------------------------------------------------------------- + +# Product directory prefixes recognised when scanning for JetBrains config dirs. +_JETBRAINS_PRODUCT_PREFIXES = ("IntelliJIdea", "PyCharm", "IU", "PC") + +# Prefixes that identify IntelliJ IDEA (Ultimate / Community via Toolbox "IU" code). +_INTELLIJ_PREFIXES = ("IntelliJIdea", "IU") +# Prefixes that identify PyCharm (Professional / Community via Toolbox "PC" code). +_PYCHARM_PREFIXES = ("PyCharm", "PC") + + +def _detect_installed_ides(idea_path: Path | None = None) -> tuple[bool, bool]: + """Detect which JetBrains IDEs are installed. + + Returns a ``(has_intellij, has_pycharm)`` tuple. When *idea_path* is + given, only that directory is inspected. + """ + if idea_path is not None: + # If pointing at a specific product dir, check its name. + name = idea_path.name + has_intellij = any(name.startswith(p) for p in _INTELLIJ_PREFIXES) + has_pycharm = any(name.startswith(p) for p in _PYCHARM_PREFIXES) + if has_intellij or has_pycharm: + return has_intellij, has_pycharm + # Treat as base directory — scan children. + if idea_path.exists(): + for child in idea_path.iterdir(): + if any(child.name.startswith(p) for p in _INTELLIJ_PREFIXES): + has_intellij = True + if any(child.name.startswith(p) for p in _PYCHARM_PREFIXES): + has_pycharm = True + return has_intellij, has_pycharm + return False, False + + base = _find_jetbrains_config_base() + if base is None or not base.exists(): + return False, False + has_intellij = False + has_pycharm = False + for child in base.iterdir(): + if any(child.name.startswith(p) for p in _INTELLIJ_PREFIXES): + has_intellij = True + if any(child.name.startswith(p) for p in _PYCHARM_PREFIXES): + has_pycharm = True + return has_intellij, has_pycharm + + +# Process names used by JetBrains IDEs (matched case-insensitively against running processes). +_JETBRAINS_PROCESS_KEYWORDS = ("idea", "pycharm", "intellij") + + +def _find_jetbrains_pids() -> list[tuple[int, str]]: + """Return a list of ``(pid, command)`` tuples for running JetBrains IDE processes. + + Excludes the current process and its parent to avoid the script killing itself + (since ``setup_idea.py`` contains "idea" which matches the process keyword filter). + """ + own_pids = {os.getpid(), os.getppid()} + system = platform.system() + if system == "Darwin": + # On macOS, look for .app bundles in the process list. + try: + result = subprocess.run( + ["ps", "-eo", "pid,comm"], + capture_output=True, + text=True, + check=True, + ) + except (FileNotFoundError, subprocess.CalledProcessError): + return [] + pids: list[tuple[int, str]] = [] + for line in result.stdout.strip().splitlines()[1:]: + parts = line.strip().split(None, 1) + if len(parts) != 2: + continue + pid_str, comm = parts + comm_lower = comm.lower() + if any(kw in comm_lower for kw in _JETBRAINS_PROCESS_KEYWORDS): + try: + pid = int(pid_str) + if pid not in own_pids: + pids.append((pid, comm)) + except ValueError: + pass + return pids + if system == "Linux": + try: + result = subprocess.run( + ["ps", "-eo", "pid,args"], + capture_output=True, + text=True, + check=True, + ) + except (FileNotFoundError, subprocess.CalledProcessError): + return [] + pids = [] + for line in result.stdout.strip().splitlines()[1:]: + parts = line.strip().split(None, 1) + if len(parts) != 2: + continue + pid_str, args = parts + args_lower = args.lower() + if any(kw in args_lower for kw in _JETBRAINS_PROCESS_KEYWORDS): + try: + pid = int(pid_str) + if pid not in own_pids: + pids.append((pid, args)) + except ValueError: + pass + return pids + return [] + + +def _kill_jetbrains_ides() -> bool: + """Attempt to gracefully terminate running JetBrains IDE processes. + + Sends SIGTERM first and waits briefly, then SIGKILL if processes remain. + Returns *True* if processes were found (whether or not they were killed). + """ + pids = _find_jetbrains_pids() + if not pids: + return False + print("[yellow]Detected running JetBrains IDE process(es):[/]") + for pid, comm in pids: + print(f" PID {pid}: {comm}") + should_kill = Confirm.ask("\nKill these processes to proceed?") + if not should_kill: + return True + for pid, _comm in pids: + try: + os.kill(pid, signal.SIGTERM) + except OSError: + pass + # Wait up to 5 seconds for graceful shutdown. + for _ in range(10): + remaining = _find_jetbrains_pids() + if not remaining: + print("[green]All JetBrains IDE processes terminated.[/]\n") + return True + time.sleep(0.5) + # Force-kill any remaining processes. + remaining = _find_jetbrains_pids() + for pid, _comm in remaining: + try: + os.kill(pid, signal.SIGKILL) + except OSError: + pass + print("[green]JetBrains IDE processes force-killed.[/]\n") + return True + + +def _find_jetbrains_config_base() -> Path | None: + """Return the base JetBrains configuration directory for the current platform.""" + system = platform.system() + if system == "Darwin": + return Path.home() / "Library" / "Application Support" / "JetBrains" + if system == "Linux": + xdg = os.environ.get("XDG_CONFIG_HOME", "") + return Path(xdg) / "JetBrains" if xdg else Path.home() / ".config" / "JetBrains" + if system == "Windows": + appdata = os.environ.get("APPDATA", "") + return Path(appdata) / "JetBrains" if appdata else None + return None + + +def _find_all_jdk_table_xmls(idea_path: Path | None = None) -> list[Path]: + """Find all ``jdk.table.xml`` files across installed JetBrains IDEs. + + When *idea_path* is given, it is used as the JetBrains configuration base + directory (or a specific product directory) instead of auto-detecting. + + Returns paths sorted descending so the most recent version comes first. + """ + if idea_path is not None: + # Allow pointing directly at a product directory (e.g. IntelliJIdea2025.1) + # or the parent JetBrains directory. + direct = idea_path / "options" / "jdk.table.xml" + if direct.exists(): + return [direct] + # Treat as base directory — scan for product subdirectories. + if idea_path.exists(): + candidates: list[Path] = [] + for prefix in _JETBRAINS_PRODUCT_PREFIXES: + candidates.extend(idea_path.glob(f"{prefix}*")) + result: list[Path] = [] + for config_dir in sorted(candidates, reverse=True): + jdk_table = config_dir / "options" / "jdk.table.xml" + if jdk_table.exists(): + result.append(jdk_table) + return result + return [] + + base = _find_jetbrains_config_base() + if base is None or not base.exists(): + return [] + candidates = [] + for prefix in _JETBRAINS_PRODUCT_PREFIXES: + candidates.extend(base.glob(f"{prefix}*")) + result = [] + for config_dir in sorted(candidates, reverse=True): + jdk_table = config_dir / "options" / "jdk.table.xml" + if jdk_table.exists(): + result.append(jdk_table) + return result + + +def _home_var_path(path: str) -> str: + """Replace the user's home directory prefix with ``$USER_HOME$``.""" + home = str(Path.home()) + if path.startswith(home): + return "$USER_HOME$" + path[len(home) :] + return path + + +def _get_venv_python_paths(venv_python: Path) -> tuple[str, str, str]: + """Return *(stdlib, lib_dynload, site_packages)* paths for *venv_python*.""" + result = subprocess.run( + [ + str(venv_python), + "-c", + "import sysconfig, site; " + "print(sysconfig.get_path('stdlib')); " + "print(sysconfig.get_path('platstdlib')); " + "print(site.getsitepackages()[0])", + ], + capture_output=True, + text=True, + check=True, + ) + lines = result.stdout.strip().splitlines() + stdlib = lines[0] + lib_dynload = str(Path(lines[1]) / "lib-dynload") + site_packages = lines[2] + return stdlib, lib_dynload, site_packages + + +def _build_sdk_jdk_element( + name: str, + version_string: str, + home_path: str, + stdlib_path: str, + lib_dynload_path: str, + site_packages_path: str, + working_dir: str, + sdk_uuid: str | None = None, +) -> ET.Element: + """Build an ``<jdk>`` XML element suitable for insertion into ``jdk.table.xml``.""" + if sdk_uuid is None: + sdk_uuid = str(uuid.uuid4()) + + jdk = ET.Element("jdk", version="2") + ET.SubElement(jdk, "name", value=name) + ET.SubElement(jdk, "type", value="Python SDK") + ET.SubElement(jdk, "version", value=version_string) + ET.SubElement(jdk, "homePath", value=_home_var_path(home_path)) + + roots = ET.SubElement(jdk, "roots") + + # annotationsPath + ann = ET.SubElement(roots, "annotationsPath") + ET.SubElement(ann, "root", type="composite") + + # classPath — stdlib, lib-dynload, site-packages + cp = ET.SubElement(roots, "classPath") + cp_composite = ET.SubElement(cp, "root", type="composite") + ET.SubElement( + cp_composite, + "root", + url=f"file://{_home_var_path(stdlib_path)}", + type="simple", + ) + ET.SubElement( + cp_composite, + "root", + url=f"file://{_home_var_path(lib_dynload_path)}", + type="simple", + ) + ET.SubElement( + cp_composite, + "root", + url=f"file://{_home_var_path(site_packages_path)}", + type="simple", + ) + + # javadocPath + jd = ET.SubElement(roots, "javadocPath") + ET.SubElement(jd, "root", type="composite") + + # sourcePath + sp = ET.SubElement(roots, "sourcePath") + ET.SubElement(sp, "root", type="composite") + + # additional — UV flavour metadata + additional = ET.SubElement( + jdk, + "additional", + SDK_UUID=sdk_uuid, + IS_UV="true", + UV_WORKING_DIR=_home_var_path(working_dir), + ) + ET.SubElement(additional, "setting", name="FLAVOR_ID", value="UvSdkFlavor") + ET.SubElement(additional, "setting", name="FLAVOR_DATA", value="{}") + + return jdk + + +def _register_sdk_in_file( + jdk_table_path: Path, + name: str, + venv_python: Path, + working_dir: Path, +) -> str: + """Register or reuse an SDK in a single ``jdk.table.xml`` file. + + Returns a status string: ``"reused"``, ``"renamed"``, or ``"created"``. + """ + home_path_var = _home_var_path(str(venv_python)) + + tree = ET.parse(jdk_table_path) + root = tree.getroot() + component = root.find(".//component[@name='ProjectJdkTable']") + if component is None: + component = ET.SubElement(root, "component", name="ProjectJdkTable") + + # 1. Look for an existing SDK whose homePath matches the target venv python. + # If found, reuse it (preserving IntelliJ's stubs/typeshed entries) and + # rename it to the expected name if necessary. + for jdk_elem in component.findall("jdk"): + home_elem = jdk_elem.find("homePath") + if home_elem is not None and home_elem.get("value") == home_path_var: + name_elem = jdk_elem.find("name") + current_name = name_elem.get("value") if name_elem is not None else "" + if current_name == name: + return "reused" + # Rename the existing SDK to match the expected convention. + if name_elem is not None: + name_elem.set("value", name) + ET.indent(tree, space=" ") + tree.write(str(jdk_table_path), encoding="unicode", xml_declaration=False) + return "renamed" + + # 2. No existing SDK matches — create a new bare-bones entry. + # IntelliJ will populate stubs/typeshed paths on first load. + result = subprocess.run( + [str(venv_python), "--version"], + capture_output=True, + text=True, + check=True, + ) + version_string = result.stdout.strip() + stdlib, lib_dynload, site_packages = _get_venv_python_paths(venv_python) + + new_jdk = _build_sdk_jdk_element( + name=name, + version_string=version_string, + home_path=str(venv_python), + stdlib_path=stdlib, + lib_dynload_path=lib_dynload, + site_packages_path=site_packages, + working_dir=str(working_dir), + ) + component.append(new_jdk) + + ET.indent(tree, space=" ") + tree.write(str(jdk_table_path), encoding="unicode", xml_declaration=False) + return "created" + + +def register_sdk(name: str, venv_dir: Path, working_dir: Path, *, idea_path: Path | None = None) -> bool: + """Register (or reuse) a Python SDK in all global ``jdk.table.xml`` files. + + If an SDK already exists whose ``homePath`` matches the target venv, it is + reused as-is (preserving IntelliJ's classPath entries such as python_stubs + and typeshed) and renamed to *name* if necessary. A new bare-bones entry + is only created when no matching SDK exists. + + When *idea_path* is given, only ``jdk.table.xml`` files under that path + are considered instead of auto-detecting all installed JetBrains IDEs. + + Returns *True* if at least one ``jdk.table.xml`` was processed, *False* if + none could be found (no JetBrains IDE installed). + """ + jdk_table_paths = _find_all_jdk_table_xmls(idea_path=idea_path) + if not jdk_table_paths: + print( + f"[yellow]Warning:[/] Could not find jdk.table.xml — " + f"SDK [bold]{name}[/] not registered globally. " + f"You can register it manually in PyCharm settings." + ) + return False + + venv_python = venv_dir / ".venv" / "bin" / "python" + + for jdk_table_path in jdk_table_paths: + status = _register_sdk_in_file(jdk_table_path, name, venv_python, working_dir) + ide_version = jdk_table_path.parent.parent.name + if status == "reused": + print(f"[green]SDK already registered:[/] [bold]{name}[/] in {ide_version}") + elif status == "renamed": + print(f"[green]Renamed existing SDK to:[/] [bold]{name}[/] in {ide_version}") + else: + print(f"[green]Created SDK:[/] [bold]{name}[/] in {ide_version}") + return True + + +# --------------------------------------------------------------------------- +# Module discovery +# --------------------------------------------------------------------------- + +EXCLUDED_PREFIXES = ("out/", ".build/", ".claude/", "dist/", ".venv/", "generated/") + + +def discover_modules(*, exclude_modules: set[str] | None = None) -> list[str]: + """Discover all modules (static + providers + shared) and return sorted list. + + *exclude_modules* is an optional set of module paths (e.g. ``{"dev/breeze"}``) + or prefixes (e.g. ``{"providers/"}``) to skip. Prefix entries end with ``/`` + and match any module whose path starts with that prefix. + """ + exclude_modules = exclude_modules or set() + + def _is_excluded(module: str) -> bool: + if module in exclude_modules: + return True + return any(module.startswith(prefix) for prefix in exclude_modules if prefix.endswith("/")) + + modules = list(STATIC_MODULES) + for pyproject_toml_file in ROOT_AIRFLOW_FOLDER_PATH.rglob("providers/**/pyproject.toml"): + relative_path = pyproject_toml_file.relative_to(ROOT_AIRFLOW_FOLDER_PATH).parent.as_posix() + if any(relative_path.startswith(prefix) for prefix in EXCLUDED_PREFIXES): + continue + modules.append(relative_path) + for pyproject_toml_file in ROOT_AIRFLOW_FOLDER_PATH.rglob("shared/*/pyproject.toml"): + relative_path = pyproject_toml_file.relative_to(ROOT_AIRFLOW_FOLDER_PATH).parent.as_posix() + if any(relative_path.startswith(prefix) for prefix in EXCLUDED_PREFIXES): + continue + modules.append(relative_path) + modules.sort() + if exclude_modules: + before = len(modules) + modules = [m for m in modules if not _is_excluded(m)] + skipped = before - len(modules) + if skipped: + print(f"[yellow]Excluded {skipped} module(s)[/]") + return modules + + +def get_module_name(module_path: str) -> str: + """Convert a module path to an IntelliJ module name (e.g. providers/amazon -> providers-amazon).""" + return module_path.replace("/", "-") + + +# --------------------------------------------------------------------------- +# Cleanup of previously generated files +# --------------------------------------------------------------------------- + +# Directories that are never scanned for leftover .iml files. +_CLEANUP_SKIP_DIRS = {".idea", ".claude", "node_modules", ".venv", ".git", ".build", "out", "dist"} + + +def _find_previous_iml_files() -> list[Path]: + """Find ``.iml`` files created by a previous run of this script. + + Scans the project tree for ``*.iml`` files, skipping directories that + are known to contain unrelated ``.iml`` files (e.g. ``node_modules``, + ``.idea``). + """ + results: list[Path] = [] + for iml_file in ROOT_AIRFLOW_FOLDER_PATH.rglob("*.iml"): + rel = iml_file.relative_to(ROOT_AIRFLOW_FOLDER_PATH) + if any(part in _CLEANUP_SKIP_DIRS for part in rel.parts): + continue + results.append(iml_file) + return sorted(results) + + +def cleanup_previous_setup() -> None: + """Remove files created by a previous run of this script. + + Deletes: + * Sub-module ``.iml`` files scattered across the project tree. + * The four ``.idea/`` files managed by the script: ``airflow.iml``, + ``modules.xml``, ``misc.xml``, and ``.name``. + + Prints the number of files found and deleted. + """ + managed_idea_files = [AIRFLOW_IML_FILE, MODULES_XML_FILE, MISC_XML_FILE, IDEA_NAME_FILE] + previous_iml_files = _find_previous_iml_files() + + files_to_delete: list[Path] = [] + for f in managed_idea_files: + if f.exists(): + files_to_delete.append(f) + files_to_delete.extend(previous_iml_files) + + if not files_to_delete: + print("[green]No files from a previous setup found.[/]\n") + return + + print(f"[yellow]Found {len(files_to_delete)} file(s) from a previous setup — deleting:[/]") + idea_count = sum(1 for f in files_to_delete if f.parent == IDEA_FOLDER_PATH) + iml_count = len(files_to_delete) - idea_count + if idea_count: + print(f" [dim]·[/] {idea_count} managed file(s) in .idea/") + if iml_count: + print(f" [dim]·[/] {iml_count} sub-module .iml file(s)") + for f in files_to_delete: + f.unlink() + print(f"[green]Deleted {len(files_to_delete)} file(s).[/]\n") + + +# --------------------------------------------------------------------------- +# Setup modes +# --------------------------------------------------------------------------- + + +def setup_idea_single_module(sdk_name: str, project_name: str, modules: list[str]): + """Set up a single IntelliJ module with all source roots (original behaviour).""" + all_module_paths: list[str] = [] + + for module in modules: + print(f"[green]Adding[/] source root: [blue]{module}[/]") + if (ROOT_AIRFLOW_FOLDER_PATH / module / "src").exists(): + all_module_paths.append(source_root_module_pattern.format(path=f"{module}/src", status="false")) + if (ROOT_AIRFLOW_FOLDER_PATH / module / "tests").exists(): + all_module_paths.append(source_root_module_pattern.format(path=f"{module}/tests", status="true")) + if module == "dev": + all_module_paths.append(source_root_module_pattern.format(path=module, status="false")) + + source_lines = "\n".join(f" {line}" for line in all_module_paths) + iml_content = _build_root_iml(sdk_name, source_lines=source_lines) + + IDEA_FOLDER_PATH.mkdir(exist_ok=True) + AIRFLOW_IML_FILE.write_text(iml_content) + MODULES_XML_FILE.write_text(single_module_modules_xml_template) + MISC_XML_FILE.write_text(misc_xml_template.format(SDK_NAME=sdk_name)) + IDEA_NAME_FILE.write_text(f"{project_name}\n") + + print(f"\n[green]Updated:[/] {AIRFLOW_IML_FILE}") + print(f"[green]Updated:[/] {MODULES_XML_FILE}") + print(f"[green]Updated:[/] {MISC_XML_FILE}") + print(f"[green]Updated:[/] {IDEA_NAME_FILE}") + + +def setup_idea_multi_module(sdk_name: str, project_name: str, breeze_sdk_name: str, modules: list[str]): + """Set up multiple IntelliJ modules -- one per distribution/package.""" + module_entries: list[str] = [] + created_iml_files: list[Path] = [] + + for module in modules: + module_name = get_module_name(module) + source_folders: list[str] = [] + + if (ROOT_AIRFLOW_FOLDER_PATH / module / "src").exists(): + source_folders.append('<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />') + if (ROOT_AIRFLOW_FOLDER_PATH / module / "tests").exists(): + source_folders.append('<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />') + if module == "dev": + source_folders.append('<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />') + + if not source_folders: + continue + + print(f"[green]Adding[/] module: [blue]{module_name}[/]") + + source_lines = "\n".join(f" {line}" for line in source_folders) + module_sdk = breeze_sdk_name if module == "dev/breeze" else "" + iml_content = _build_sub_module_iml(source_lines, sdk_name=module_sdk) + + iml_path = ROOT_AIRFLOW_FOLDER_PATH / module / f"{module_name}.iml" + iml_path.write_text(iml_content) + created_iml_files.append(iml_path) + + relative_iml_path = f"{module}/{module_name}.iml" + module_entries.append(multi_module_entry_template.format(iml_path=relative_iml_path)) + + # Root module with excludes only + root_iml_content = _build_root_iml(sdk_name) + + IDEA_FOLDER_PATH.mkdir(exist_ok=True) + AIRFLOW_IML_FILE.write_text(root_iml_content) + + modules_xml_content = multi_module_modules_xml_template.format( + MODULE_ENTRIES="\n ".join(module_entries), + ) + MODULES_XML_FILE.write_text(modules_xml_content) + MISC_XML_FILE.write_text(misc_xml_template.format(SDK_NAME=sdk_name)) + IDEA_NAME_FILE.write_text(f"{project_name}\n") + + print(f"\n[green]Updated:[/] {AIRFLOW_IML_FILE} (root module)") + print(f"[green]Updated:[/] {MODULES_XML_FILE}") + print(f"[green]Updated:[/] {MISC_XML_FILE}") + print(f"[green]Updated:[/] {IDEA_NAME_FILE}") + print(f"[green]Created:[/] {len(created_iml_files)} sub-module .iml files") + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Set up PyCharm/IntelliJ IDEA project configuration for Airflow." + ) + module_group = parser.add_mutually_exclusive_group() + module_group.add_argument( + "--multi-module", + action="store_true", + default=None, + help="Create separate IntelliJ modules for each distribution/package " + "instead of a single module with multiple source roots. " + "This is the default when IntelliJ IDEA is detected (multi-module " + "is not supported by PyCharm).", + ) + module_group.add_argument( + "--single-module", + action="store_true", + default=None, + help="Create a single IntelliJ module with all source roots. " + "This is the default when only PyCharm is detected or when no " + "IDE can be detected. Use this to override auto-detection.", + ) + parser.add_argument( + "--confirm", + action="store_true", + help="Skip the confirmation prompt asking whether PyCharm/IntelliJ IDEA " + "has been closed. Useful for non-interactive or scripted runs.", + ) + parser.add_argument( + "--no-kill", + action="store_true", + help="Do not attempt to detect and kill running PyCharm/IntelliJ IDEA " + "processes. By default the script looks for running IDE processes, " + "asks for confirmation, sends SIGTERM and falls back to SIGKILL if " + "they don't exit within 5 seconds.", + ) + parser.add_argument( + "--idea-path", + metavar="PATH", + help="Path to the JetBrains configuration directory to update instead of " + "auto-detecting all installed IDEs. Can point to the base JetBrains " + "directory (e.g. '~/Library/Application Support/JetBrains') or a " + "specific product directory (e.g. '.../JetBrains/IntelliJIdea2025.1').", + ) + + # --- Python version --- + parser.add_argument( + "--python", + metavar="VERSION", + help="Python minor version to use for the venv, e.g. '3.12'. " + "Passed as --python to 'uv sync'. Must be compatible with the " + "project's requires-python. By default uv picks the version.", + ) + + # --- Module exclusion --- + parser.add_argument( + "--exclude", + action="append", + default=[], + metavar="MODULE_OR_GROUP", + help="Exclude a module or module group from the project. Can be " + "specified multiple times. A module is a path relative to the " + "project root (e.g. 'providers/amazon', 'dev/breeze'). " + f"Recognised groups: {', '.join(sorted(MODULE_GROUPS))} " + "(e.g. '--exclude providers' excludes all providers).", + ) + return parser + + +def _resolve_excludes(raw: list[str]) -> set[str]: + """Expand group names and return a set of module paths / prefixes to exclude.""" + result: set[str] = set() + for item in raw: + if item in MODULE_GROUPS: + prefix = MODULE_GROUPS[item] + # Groups that map to a directory prefix get a trailing "/" so + # the discover_modules prefix matching works; exact module + # names (like "dev") are kept as-is. + result.add(prefix if prefix.endswith("/") else prefix) + else: + result.add(item) + return result + + +def _validate_python_version(version: str) -> None: + """Validate that *version* is supported by the project's ``requires-python``.""" + supported = get_supported_python_versions(ROOT_AIRFLOW_FOLDER_PATH / "pyproject.toml") + if version not in supported: + print( + f"[red]Error:[/] Python {version} is not compatible with the project's " + f"requires-python constraint.\n" + f"Supported versions: [bold]{', '.join(supported)}[/]" + ) + sys.exit(1) + + +def main(): + parser = _build_parser() + args = parser.parse_args() + + # --- Validate --python early --- + python_version: str = args.python or "" + if python_version: + _validate_python_version(python_version) + + # --- Resolve --idea-path --- + idea_path: Path | None = Path(args.idea_path).expanduser() if args.idea_path else None + + # --- Resolve multi-module mode --- + if args.multi_module: + multi_module = True + elif args.single_module: + multi_module = False + else: + # Auto-detect based on installed IDE(s). + has_intellij, has_pycharm = _detect_installed_ides(idea_path) + if has_intellij: + multi_module = True + print("[cyan]Detected IntelliJ IDEA installation — defaulting to multi-module mode.[/]") + elif has_pycharm: + multi_module = False + print( + "[cyan]Detected PyCharm installation — " + "defaulting to single-module mode " + "(multi-module is not supported by PyCharm).[/]" + ) + else: + multi_module = False + print("[cyan]No JetBrains IDE detected — defaulting to single-module mode.[/]") + print("[dim]Use --multi-module or --single-module to override.[/]\n") + + # --- Show available versions on request --- + supported = get_supported_python_versions(ROOT_AIRFLOW_FOLDER_PATH / "pyproject.toml") + print(f"[cyan]Supported Python versions:[/] {', '.join(supported)}") + + # --- Kill or confirm IDE is closed --- + if not args.no_kill: + pids = _find_jetbrains_pids() + if pids: + _kill_jetbrains_ides() + else: + print("[green]No running IntelliJ IDEA / PyCharm processes detected — safe to proceed.[/]\n") + elif not args.confirm: + print( + "\n[yellow]Warning:[/] PyCharm/IntelliJ IDEA must be closed before running this script, " + "otherwise the IDE may overwrite the changes on exit.\n" + ) + ide_closed = Confirm.ask("Have you closed PyCharm/IntelliJ IDEA?") + if not ide_closed: + print("[yellow]Please close PyCharm/IntelliJ IDEA and run this script again.[/]\n") + return + + # --- uv sync --- + run_uv_sync(ROOT_AIRFLOW_FOLDER_PATH, "project root", python_version=python_version) + sdk_name = get_sdk_name(ROOT_AIRFLOW_FOLDER_PATH) + project_name = f"[airflow]:{ROOT_AIRFLOW_FOLDER_PATH.name}" + print(f"[cyan]Detected Python SDK:[/] [bold]{sdk_name}[/]") + + run_uv_sync(BREEZE_PATH, "dev/breeze", python_version=python_version) + breeze_sdk_name = get_sdk_name(BREEZE_PATH, label=f"{ROOT_AIRFLOW_FOLDER_PATH.name}:breeze") + print(f"[cyan]Detected Breeze SDK:[/] [bold]{breeze_sdk_name}[/]") + + # --- Module discovery --- + exclude_set = _resolve_excludes(args.exclude) + modules = discover_modules(exclude_modules=exclude_set) + + print(f"[cyan]Mode:[/] [bold]{'multi-module' if multi_module else 'single-module'}[/]") + print(f"[cyan]Modules:[/] [bold]{len(modules)}[/]\n") + + files_to_update = [AIRFLOW_IML_FILE, MODULES_XML_FILE, MISC_XML_FILE, IDEA_NAME_FILE] + print("[yellow]Warning!!![/] This script will update the PyCharm/IntelliJ IDEA configuration files:\n") + for f in files_to_update: + print(f"* {f}") + if multi_module: + print("* <module>/<module>.iml for each discovered module\n") + else: + print() + + previous_iml_files = _find_previous_iml_files() + managed_to_delete = [f for f in files_to_update if f.exists()] + if previous_iml_files or managed_to_delete: + total = len(previous_iml_files) + len(managed_to_delete) + print( + f"[yellow]Note:[/] {total} file(s) from a previous setup will also be [bold]deleted[/] " + "before writing the new configuration:" + ) + if managed_to_delete: + print(f" [dim]·[/] {len(managed_to_delete)} managed file(s) in .idea/") + if previous_iml_files: + print(f" [dim]·[/] {len(previous_iml_files)} sub-module .iml file(s)") + print() + + should_continue = Confirm.ask("Overwrite the files?") + if not should_continue: + print("[yellow]Skipped\n") + return + + print() + cleanup_previous_setup() + if multi_module: + setup_idea_multi_module(sdk_name, project_name, breeze_sdk_name, modules) + else: + setup_idea_single_module(sdk_name, project_name, modules) + + # --- Register SDKs in global JetBrains configuration --- + print() + register_sdk(sdk_name, ROOT_AIRFLOW_FOLDER_PATH, ROOT_AIRFLOW_FOLDER_PATH, idea_path=idea_path) + register_sdk(breeze_sdk_name, BREEZE_PATH, BREEZE_PATH, idea_path=idea_path) + + print("\n[green]Success[/]\n") + print("[yellow]Important:[/] Restart PyCharm/IntelliJ IDEA to pick up the new configuration.\n") + + +if __name__ == "__main__": + main()
