From 838a94b4202cb3561d820f909927c4415050ed1f Mon Sep 17 00:00:00 2001
From: atalman
Date: Tue, 18 Nov 2025 15:01:49 -0800
Subject: [PATCH 1/8] Change dependnecy update function to operate on
index.html only rather then copy whls over
---
s3_management/update_dependencies.py | 188 ++++++++++++++++-----------
1 file changed, 110 insertions(+), 78 deletions(-)
diff --git a/s3_management/update_dependencies.py b/s3_management/update_dependencies.py
index 904b2d428d..3e922efc01 100644
--- a/s3_management/update_dependencies.py
+++ b/s3_management/update_dependencies.py
@@ -544,6 +544,19 @@
}
+def is_nvidia_package(pkg_name: str) -> bool:
+ """Check if a package is from NVIDIA and should use pypi.nvidia.com"""
+ return pkg_name.startswith("nvidia-")
+
+
+def get_package_source_url(pkg_name: str) -> str:
+ """Get the source URL for a package based on its type"""
+ if is_nvidia_package(pkg_name):
+ return f"https://pypi.nvidia.com/{pkg_name}/"
+ else:
+ return f"https://pypi.org/simple/{pkg_name}/"
+
+
def download(url: str) -> bytes:
from urllib.request import urlopen
@@ -551,101 +564,123 @@ def download(url: str) -> bytes:
return conn.read()
-def is_stable(package_version: str) -> bool:
- return bool(re.match(r"^([0-9]+\.)+[0-9]+$", package_version))
+def replace_relative_links_with_absolute(html: str, base_url: str) -> str:
+ """
+ Replace all relative links in HTML with absolute links.
+
+ Args:
+ html: HTML content as string
+ base_url: Base URL to prepend to relative links
+
+ Returns:
+ Modified HTML with absolute links
+ """
+ # Ensure base_url ends with /
+ if not base_url.endswith('/'):
+ base_url += '/'
+
+ # Pattern to match href attributes with relative URLs (not starting with http:// or https://)
+ def replace_href(match):
+ full_match = match.group(0)
+ url = match.group(1)
+
+ # If URL is already absolute, don't modify it
+ if url.startswith('http://') or url.startswith('https://') or url.startswith('//'):
+ return full_match
+
+ # Remove leading ./ or /
+ url = url.lstrip('./')
+ url = url.lstrip('/')
+
+ # Replace with absolute URL
+ return f'href="{base_url}{url}"'
+
+ # Replace href="..." patterns
+ html = re.sub(r'href="([^"]+)"', replace_href, html)
+ return html
-def parse_simple_idx(url: str) -> Dict[str, str]:
- html = download(url).decode("ascii")
- return {
+
+def parse_simple_idx(url: str) -> Tuple[Dict[str, str], str]:
+ """
+ Parse a simple package index and return package dict and raw HTML.
+
+ Returns:
+ Tuple of (package_dict, raw_html)
+ """
+ html = download(url).decode("utf-8", errors="ignore")
+ packages = {
name: url
for (url, name) in re.findall(']*>([^>]+)', html)
}
+ return packages, html
-def get_whl_versions(idx: Dict[str, str]) -> List[str]:
- return [
- k.split("-")[1]
- for k in idx.keys()
- if k.endswith(".whl") and is_stable(k.split("-")[1])
- ]
+def upload_index_html(
+ pkg_name: str,
+ prefix: str,
+ html: str,
+ base_url: str,
+ *,
+ dry_run: bool = False,
+) -> None:
+ """Upload modified index.html to S3 with absolute links"""
+ # Replace relative links with absolute links
+ modified_html = replace_relative_links_with_absolute(html, base_url)
+ index_key = f"{prefix}/{pkg_name}/index.html"
-def get_wheels_of_version(idx: Dict[str, str], version: str) -> Dict[str, str]:
- return {
- k: v
- for (k, v) in idx.items()
- if k.endswith(".whl") and k.split("-")[1] == version
- }
+ if dry_run:
+ print(f"Dry Run - not uploading index.html to s3://pytorch/{index_key}")
+ return
+
+ print(f"Uploading index.html to s3://pytorch/{index_key}")
+ BUCKET.Object(key=index_key).put(
+ ACL="public-read",
+ ContentType="text/html",
+ Body=modified_html.encode("utf-8")
+ )
-def upload_missing_whls(
- pkg_name: str = "numpy",
- prefix: str = "whl/test",
+def upload_package_using_simple_index(
+ pkg_name: str,
+ prefix: str,
*,
dry_run: bool = False,
- only_pypi: bool = False,
- target_version: str = "latest",
) -> None:
- pypi_idx = parse_simple_idx(f"https://pypi.org/simple/{pkg_name}")
- pypi_versions = get_whl_versions(pypi_idx)
-
- # Determine which version to use
- if target_version == "latest" or not target_version:
- selected_version = pypi_versions[-1] if pypi_versions else None
- elif target_version in pypi_versions:
- selected_version = target_version
- else:
- print(
- f"Warning: Version {target_version} not found for {pkg_name}, using latest"
- )
- selected_version = pypi_versions[-1] if pypi_versions else None
+ """
+ Upload package index.html from PyPI Simple Index.
+ Simply copies the index.html with absolute links - no wheel uploads or version filtering.
+ Works for both NVIDIA and non-NVIDIA packages.
+ """
+ source_url = get_package_source_url(pkg_name)
+ is_nvidia = is_nvidia_package(pkg_name)
+
+ print(f"Processing {pkg_name} using {'NVIDIA' if is_nvidia else 'PyPI'} Simple Index: {source_url}")
- if not selected_version:
- print(f"No stable versions found for {pkg_name}")
+ # Parse the index and get raw HTML
+ try:
+ _, raw_html = parse_simple_idx(source_url)
+ except Exception as e:
+ print(f"Error fetching package {pkg_name}: {e}")
return
- pypi_latest_packages = get_wheels_of_version(pypi_idx, selected_version)
-
- download_latest_packages: Dict[str, str] = {}
- if not only_pypi:
- download_idx = parse_simple_idx(
- f"https://download.pytorch.org/{prefix}/{pkg_name}"
- )
- download_latest_packages = get_wheels_of_version(download_idx, selected_version)
-
- has_updates = False
- for pkg in pypi_latest_packages:
- if pkg in download_latest_packages:
- continue
- # Skip pp packages
- if "-pp3" in pkg:
- continue
- # Skip win32 packages
- if "-win32" in pkg:
- continue
- # Skip muslinux packages
- if "-musllinux" in pkg:
- continue
- print(f"Downloading {pkg}")
- if dry_run:
- has_updates = True
- print(f"Dry Run - not Uploading {pkg} to s3://pytorch/{prefix}/")
- continue
- data = download(pypi_idx[pkg])
- print(f"Uploading {pkg} to s3://pytorch/{prefix}/")
- BUCKET.Object(key=f"{prefix}/{pkg}").put(
- ACL="public-read", ContentType="binary/octet-stream", Body=data
- )
- has_updates = True
- if not has_updates:
- print(f"{pkg_name} is already at version {selected_version} for {prefix}")
+ # Upload modified index.html with absolute links
+ upload_index_html(
+ pkg_name,
+ prefix,
+ raw_html,
+ source_url,
+ dry_run=dry_run
+ )
+
+ print(f"Successfully processed index.html for {pkg_name}")
def main() -> None:
from argparse import ArgumentParser
- parser = ArgumentParser("Upload dependent packages to s3://pytorch")
+ parser = ArgumentParser("Upload dependent package indexes to s3://pytorch")
# Get unique paths from the packages list
project_paths = list(
{
@@ -657,7 +692,6 @@ def main() -> None:
project_paths += ["all"]
parser.add_argument("--package", choices=project_paths, default="torch")
parser.add_argument("--dry-run", action="store_true")
- parser.add_argument("--only-pypi", action="store_true")
parser.add_argument("--include-stable", action="store_true")
args = parser.parse_args()
@@ -682,12 +716,10 @@ def main() -> None:
else:
full_path = f"{prefix}"
- upload_missing_whls(
+ upload_package_using_simple_index(
pkg_name,
full_path,
- dry_run=args.dry_run,
- only_pypi=args.only_pypi,
- target_version=pkg_config["version"],
+ dry_run=args.dry_run
)
From 6040f5613ce5f83c7fc6cfb7c63728c27f81dc53 Mon Sep 17 00:00:00 2001
From: atalman
Date: Tue, 18 Nov 2025 15:13:36 -0800
Subject: [PATCH 2/8] fix
---
s3_management/update_dependencies.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/s3_management/update_dependencies.py b/s3_management/update_dependencies.py
index 3e922efc01..cf7ca9b76d 100644
--- a/s3_management/update_dependencies.py
+++ b/s3_management/update_dependencies.py
@@ -655,7 +655,7 @@ def upload_package_using_simple_index(
"""
source_url = get_package_source_url(pkg_name)
is_nvidia = is_nvidia_package(pkg_name)
-
+
print(f"Processing {pkg_name} using {'NVIDIA' if is_nvidia else 'PyPI'} Simple Index: {source_url}")
# Parse the index and get raw HTML
@@ -673,7 +673,7 @@ def upload_package_using_simple_index(
source_url,
dry_run=dry_run
)
-
+
print(f"Successfully processed index.html for {pkg_name}")
From 201967fef6f8dd4100b080218fa5d6409177cb78 Mon Sep 17 00:00:00 2001
From: atalman
Date: Tue, 18 Nov 2025 15:43:43 -0800
Subject: [PATCH 3/8] lint
---
s3_management/update_dependencies.py | 34 ++++++++++++----------------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/s3_management/update_dependencies.py b/s3_management/update_dependencies.py
index cf7ca9b76d..4268731e7c 100644
--- a/s3_management/update_dependencies.py
+++ b/s3_management/update_dependencies.py
@@ -576,8 +576,8 @@ def replace_relative_links_with_absolute(html: str, base_url: str) -> str:
Modified HTML with absolute links
"""
# Ensure base_url ends with /
- if not base_url.endswith('/'):
- base_url += '/'
+ if not base_url.endswith("/"):
+ base_url += "/"
# Pattern to match href attributes with relative URLs (not starting with http:// or https://)
def replace_href(match):
@@ -585,12 +585,16 @@ def replace_href(match):
url = match.group(1)
# If URL is already absolute, don't modify it
- if url.startswith('http://') or url.startswith('https://') or url.startswith('//'):
+ if (
+ url.startswith("http://")
+ or url.startswith("https://")
+ or url.startswith("//")
+ ):
return full_match
# Remove leading ./ or /
- url = url.lstrip('./')
- url = url.lstrip('/')
+ url = url.lstrip("./")
+ url = url.lstrip("/")
# Replace with absolute URL
return f'href="{base_url}{url}"'
@@ -636,9 +640,7 @@ def upload_index_html(
print(f"Uploading index.html to s3://pytorch/{index_key}")
BUCKET.Object(key=index_key).put(
- ACL="public-read",
- ContentType="text/html",
- Body=modified_html.encode("utf-8")
+ ACL="public-read", ContentType="text/html", Body=modified_html.encode("utf-8")
)
@@ -656,7 +658,9 @@ def upload_package_using_simple_index(
source_url = get_package_source_url(pkg_name)
is_nvidia = is_nvidia_package(pkg_name)
- print(f"Processing {pkg_name} using {'NVIDIA' if is_nvidia else 'PyPI'} Simple Index: {source_url}")
+ print(
+ f"Processing {pkg_name} using {'NVIDIA' if is_nvidia else 'PyPI'} Simple Index: {source_url}"
+ )
# Parse the index and get raw HTML
try:
@@ -666,13 +670,7 @@ def upload_package_using_simple_index(
return
# Upload modified index.html with absolute links
- upload_index_html(
- pkg_name,
- prefix,
- raw_html,
- source_url,
- dry_run=dry_run
- )
+ upload_index_html(pkg_name, prefix, raw_html, source_url, dry_run=dry_run)
print(f"Successfully processed index.html for {pkg_name}")
@@ -717,9 +715,7 @@ def main() -> None:
full_path = f"{prefix}"
upload_package_using_simple_index(
- pkg_name,
- full_path,
- dry_run=args.dry_run
+ pkg_name, full_path, dry_run=args.dry_run
)
From 188069634cf7f2fa4013530e1d9959d4967019fd Mon Sep 17 00:00:00 2001
From: atalman
Date: Wed, 19 Nov 2025 14:51:48 -0800
Subject: [PATCH 4/8] more fixes
---
.github/workflows/update-s3-dependencies.yml | 5 --
s3_management/manage.py | 58 +++++++++++++++-----
s3_management/update_dependencies.py | 2 +-
3 files changed, 44 insertions(+), 21 deletions(-)
diff --git a/.github/workflows/update-s3-dependencies.yml b/.github/workflows/update-s3-dependencies.yml
index 5a728b1357..7d297aed43 100644
--- a/.github/workflows/update-s3-dependencies.yml
+++ b/.github/workflows/update-s3-dependencies.yml
@@ -1,11 +1,6 @@
name: Update S3 HTML dependencies for download.pytorch.org nightly and test
on:
- push:
- branches:
- - main
- paths:
- - s3_management/update_dependencies.py
workflow_dispatch:
inputs:
dryrun:
diff --git a/s3_management/manage.py b/s3_management/manage.py
index 71de731d69..77aabbc228 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -132,6 +132,7 @@
"charset_normalizer",
"cmake",
"colorama",
+ "cuda_bindings",
"fbgemm_gpu",
"fbgemm_gpu_genai",
"filelock",
@@ -218,7 +219,6 @@
"torchvision_extra_decoders",
"triton",
"tqdm",
- "typing_extensions",
"typing_inspect",
"urllib3",
"xformers",
@@ -544,7 +544,7 @@ def to_simple_package_html(self, subdir: Optional[str], package_name: str) -> st
attributes += ' data-requires-python=">=3.10"'
out.append(
- f' {path.basename(obj.key).replace("%2B","+")}
'
+ f' {path.basename(obj.key).replace("%2B", "+")}
'
)
# Adding html footer
out.append("
")
- for pkg_name in sorted(self.get_package_names(subdir)):
+
+ # Get packages from wheel files
+ packages_from_wheels = set(self.get_package_names(subdir))
+
+ # Also find packages that have index.html but no wheels
+ packages_with_index_only = set()
+ resolved_subdir = self._resolve_subdir(subdir)
+
+ # List all objects in the subdir to find packagename/index.html patterns
+ prefix_to_search = f"{resolved_subdir}/"
+ for obj in BUCKET.objects.filter(Prefix=prefix_to_search):
+ # Check if this is a packagename/index.html file
+ relative_key = obj.key[len(prefix_to_search):]
+ parts = relative_key.split("/")
+ if len(parts) == 2 and parts[1] == "index.html":
+ package_name = parts[0].replace("-", "_")
+ # Convert back to the format used in wheel names (use _ not -)
+ # But we need to check if this package already has wheels
+ if package_name.lower() not in {p.lower() for p in packages_from_wheels}:
+ packages_with_index_only.add(package_name)
+ print(f"INFO: Including package '{package_name}' (has index.html but no wheels)")
+
+ # Combine both sets of packages
+ all_packages = packages_from_wheels | packages_with_index_only
+
+ for pkg_name in sorted(all_packages):
out.append(
- f' {pkg_name.replace("_","-")}
'
+ f' {pkg_name.replace("_", "-")}
'
)
# Adding html footer
out.append(" ")
@@ -691,16 +716,19 @@ def fetch_metadata(self) -> None:
# Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
regex_multipart_upload = r"^[A-Za-z0-9+/=]+=-[0-9]+$"
with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
- for idx, future in {
- idx: executor.submit(
- lambda key: CLIENT.head_object(
- Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled"
- ),
- obj.orig_key,
- )
- for (idx, obj) in enumerate(self.objects)
- if obj.size is None
- }.items():
+ futures = {}
+ for idx, obj in enumerate(self.objects):
+ if obj.size is None:
+ print(f"Fetching metadata for: {obj.orig_key}")
+ future = executor.submit(
+ lambda key: CLIENT.head_object(
+ Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled"
+ ),
+ obj.orig_key,
+ )
+ futures[idx] = future
+
+ for idx, future in futures.items():
response = future.result()
raw = response.get("ChecksumSHA256")
if raw and match(regex_multipart_upload, raw):
@@ -813,7 +841,7 @@ def main() -> None:
)
etime = time.time()
print(
- f"DEBUG: Fetched {len(idx.objects)} objects for '{prefix}' in {etime-stime:.2f} seconds"
+ f"DEBUG: Fetched {len(idx.objects)} objects for '{prefix}' in {etime - stime:.2f} seconds"
)
if args.compute_sha256:
idx.compute_sha256()
diff --git a/s3_management/update_dependencies.py b/s3_management/update_dependencies.py
index 4268731e7c..9edc58ab45 100644
--- a/s3_management/update_dependencies.py
+++ b/s3_management/update_dependencies.py
@@ -605,7 +605,7 @@ def replace_href(match):
return html
-def parse_simple_idx(url: str) -> Tuple[Dict[str, str], str]:
+def parse_simple_idx(url: str) -> tuple[Dict[str, str], str]:
"""
Parse a simple package index and return package dict and raw HTML.
From 93e96f957bebfa01365fbed40d15fb748f3faafd Mon Sep 17 00:00:00 2001
From: atalman
Date: Wed, 19 Nov 2025 17:31:32 -0800
Subject: [PATCH 5/8] more_fixes
---
s3_management/manage.py | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/s3_management/manage.py b/s3_management/manage.py
index 77aabbc228..aeb8e0f5a6 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -574,15 +574,19 @@ def to_simple_packages_html(
prefix_to_search = f"{resolved_subdir}/"
for obj in BUCKET.objects.filter(Prefix=prefix_to_search):
# Check if this is a packagename/index.html file
- relative_key = obj.key[len(prefix_to_search):]
+ relative_key = obj.key[len(prefix_to_search) :]
parts = relative_key.split("/")
if len(parts) == 2 and parts[1] == "index.html":
package_name = parts[0].replace("-", "_")
# Convert back to the format used in wheel names (use _ not -)
# But we need to check if this package already has wheels
- if package_name.lower() not in {p.lower() for p in packages_from_wheels}:
+ if package_name.lower() not in {
+ p.lower() for p in packages_from_wheels
+ }:
packages_with_index_only.add(package_name)
- print(f"INFO: Including package '{package_name}' (has index.html but no wheels)")
+ print(
+ f"INFO: Including package '{package_name}' in {prefix_to_search} (has index.html but no wheels)"
+ )
# Combine both sets of packages
all_packages = packages_from_wheels | packages_with_index_only
@@ -612,8 +616,11 @@ def upload_libtorch_html(self) -> None:
def upload_pep503_htmls(self) -> None:
for subdir in self.subdirs:
index_html = self.to_simple_packages_html(subdir=subdir)
+
for bucket in INDEX_BUCKETS:
print(f"INFO Uploading {subdir}/index.html to {bucket.name}")
+ print(f"{index_html}")
+
bucket.Object(key=f"{subdir}/index.html").put(
ACL="public-read",
CacheControl="no-cache,no-store,must-revalidate",
@@ -727,7 +734,7 @@ def fetch_metadata(self) -> None:
obj.orig_key,
)
futures[idx] = future
-
+
for idx, future in futures.items():
response = future.result()
raw = response.get("ChecksumSHA256")
From a9ca0fa4ae6e7f64210c05da48df035351dcc064 Mon Sep 17 00:00:00 2001
From: atalman
Date: Wed, 19 Nov 2025 17:34:01 -0800
Subject: [PATCH 6/8] more
---
s3_management/manage.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/s3_management/manage.py b/s3_management/manage.py
index aeb8e0f5a6..e4679aeb24 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -219,6 +219,7 @@
"torchvision_extra_decoders",
"triton",
"tqdm",
+ "typing_extensions",
"typing_inspect",
"urllib3",
"xformers",
From 323ce775c2f2c37895c3e2bfdcf2952c2372ed8a Mon Sep 17 00:00:00 2001
From: atalman
Date: Wed, 19 Nov 2025 17:37:25 -0800
Subject: [PATCH 7/8] fix
---
s3_management/manage.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/s3_management/manage.py b/s3_management/manage.py
index e4679aeb24..f63caa97df 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -620,8 +620,6 @@ def upload_pep503_htmls(self) -> None:
for bucket in INDEX_BUCKETS:
print(f"INFO Uploading {subdir}/index.html to {bucket.name}")
- print(f"{index_html}")
-
bucket.Object(key=f"{subdir}/index.html").put(
ACL="public-read",
CacheControl="no-cache,no-store,must-revalidate",
From 7829ad8ed7b10bbc9d336256faa66bba995d89d1 Mon Sep 17 00:00:00 2001
From: atalman
Date: Wed, 19 Nov 2025 17:42:31 -0800
Subject: [PATCH 8/8] test
---
s3_management/manage.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/s3_management/manage.py b/s3_management/manage.py
index f63caa97df..9ded3453f3 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -725,7 +725,6 @@ def fetch_metadata(self) -> None:
futures = {}
for idx, obj in enumerate(self.objects):
if obj.size is None:
- print(f"Fetching metadata for: {obj.orig_key}")
future = executor.submit(
lambda key: CLIENT.head_object(
Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled"