WillAyd commented on code in PR #49259: URL: https://github.com/apache/arrow/pull/49259#discussion_r2841742290
########## python/_build_backend/__init__.py: ########## @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Build backend wrapper that resolves license symlinks before delegating +to scikit-build-core. + +Arrow's LICENSE.txt and NOTICE.txt live at the repository root, one level +above python/. They are symlinked into python/ so that license-files in +pyproject.toml can reference them otherwise project metadata fails validation. +This is done before any build backend is invoked that's why symlinks are necessary. +But when building sdist tarballs symlinks are not copied and we end up with +broken LICENSE.txt and NOTICE.txt. + +This custom build backend only replace the symlinks with hardlinks +before scikit_build_core.build.build_sdist so +that sdist contains the actual file content. The symlinks are restored +afterwards so the git working tree stays clean. +""" + +from contextlib import contextmanager +import os +from pathlib import Path +import shutil +import sys + +from scikit_build_core.build import * # noqa: F401,F403 +from scikit_build_core.build import build_sdist as scikit_build_sdist + +LICENSE_FILES = ("LICENSE.txt", "NOTICE.txt") +PYTHON_DIR = Path(__file__).resolve().parent.parent + + +@contextmanager +def prepare_licenses(): + # Temporarily replace symlinks with hardlinks so sdist gets real content. + # On Windows we just copy the files since hardlinks might not be supported. + for name in LICENSE_FILES: + parent_license = PYTHON_DIR.parent / name + pyarrow_license = PYTHON_DIR / name + if sys.platform == "win32": + # For Windows copy the files. + pyarrow_license.unlink(missing_ok=True) + shutil.copy2(parent_license, pyarrow_license) + else: + # For Unix-like systems we replace the symlink with + # a hardlink to avoid copying the file content. + if pyarrow_license.is_symlink(): + target = pyarrow_license.resolve() + pyarrow_license.unlink() + os.link(target, pyarrow_license) + try: + yield + finally: + if sys.platform != "win32": Review Comment: Yea I agree with your overall sentiment - sadly its a lot of effort to maintain these two files :-) I think the meson-python approach was pretty good, where a dist script copied these into the source distribution at the time it was being made. I'm wondering if scikit-build-core has a similar hook -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
