From 903e9dc4f8db6174da7bd83eea99a233ebc9e116 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Thu, 16 Apr 2026 19:06:50 +0900 Subject: [PATCH] fix(pypi): skip index lookups when all package overrides are specified When index_url_overrides is provided for all packages, we no longer need to call the index at all. This improves performance and aligns with the expected behavior where overrides should be sufficient. Fixes #3709 --- CHANGELOG.md | 3 +- python/private/pypi/simpleapi_download.bzl | 23 +++++----- .../simpleapi_download_tests.bzl | 45 +++++++++++++++++++ 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89b8356f00..6b3194c847 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,7 +81,8 @@ Other changes: we will from now on fetch the lists of available packages on each index. The used package mappings will be written as facts to the `MODULE.bazel.lock` file on supported bazel versions and it should be done at most once. As a result, - per-package {obj}`experimental_index_url_overrides` is no longer needed . What + per-package {obj}`experimental_index_url_overrides` is no longer needed, but + if specified, it needs to be provided for all packages not on the default index. What is more, the flags for `--index_url` and `--extra-index-url` now behave in the same way as in `uv` or `pip`, i.e. we default to `--index-url` if the package is not found in `--extra-index-url`. Fixes diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 97215d753d..63044bc14a 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -43,8 +43,7 @@ def simpleapi_download( attr: Contains the parameters for the download. They are grouped into a struct for better clarity. It must have attributes: * index_url: str, the index, or if `extra_index_urls` are passed, the default index. - * index_url_overrides: dict[str, str], the index overrides for - separate packages. + * index_url_overrides: dict[str, str], the index overrides for separate packages. * extra_index_urls: Will be looked at in the order they are defined and the first match wins. This is similar to what uv does, see https://docs.astral.sh/uv/concepts/indexes/#searching-across-multiple-indexes. @@ -132,16 +131,22 @@ def simpleapi_download( return contents def _get_dist_urls(ctx, *, default_index, index_urls, index_url_overrides, sources, read_simpleapi, attr, block, _fail = fail, **kwargs): + if index_url_overrides: + # Let's not call the index at all and just assume that all of the overrides have been + # specified. + return { + pkg: _normalize_url("{}/{}/".format( + index_url_overrides.get(pkg, default_index), + pkg.replace("_", "-"), # Use the official normalization for URLs + )) + for pkg in sources + } + downloads = {} results = {} # Ensure the value is not frozen index_urls = [] + (index_urls or []) - for extra in index_url_overrides.values(): - if extra not in index_urls: - index_urls.append(extra) - - index_urls = index_urls or [] if default_index not in index_urls: index_urls.append(default_index) @@ -174,10 +179,6 @@ def _get_dist_urls(ctx, *, default_index, index_urls, index_url_overrides, sourc # and in the outer function process merging of the results. continue - if index_url_overrides.get(pkg, index_url) != index_url: - # we should not use this index for the package - continue - found = result.output.get(pkg) if not found: continue diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 55439c2593..c515c19ae0 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -273,6 +273,51 @@ def _test_download_url_parallel(env): _tests.append(_test_download_url_parallel) +def _test_download_url_parallel_with_overrides(env): + downloads = {} + reads = [ + "", + "", + "", + ] + + def download(url, output, **kwargs): + _ = kwargs # buildifier: disable=unused-variable + downloads[url[0]] = output + return struct(wait = lambda: struct(success = True)) + + simpleapi_download( + ctx = struct( + getenv = {}.get, + download = download, + report_progress = lambda _: None, + # We will first add a download to the list, so this is a poor man's `next(foo)` + # implementation. We use 2 because we will enqueue 2 downloads in parallel. + read = lambda i: reads[len(downloads) - 2], + path = lambda i: "path/for/" + i, + ), + attr = struct( + index_url_overrides = { + "bar": "https://example.com/extra/simple/", + }, + index_url = "https://example.com/default/simple/", + extra_index_urls = [], + sources = {"bar": None, "baz": None, "foo": None}, + envsubst = [], + ), + cache = pypi_cache(), + parallel_download = True, + get_auth = lambda ctx, urls, ctx_attr: struct(), + ) + + env.expect.that_dict(downloads).contains_exactly({ + "https://example.com/default/simple/baz/": "path/for/https___example_com_default_simple_baz.html", + "https://example.com/default/simple/foo/": "path/for/https___example_com_default_simple_foo.html", + "https://example.com/extra/simple/bar/": "path/for/https___example_com_extra_simple_bar.html", + }) + +_tests.append(_test_download_url_parallel_with_overrides) + def _test_download_envsubst_url(env): downloads = {} reads = [