From 838a94b4202cb3561d820f909927c4415050ed1f Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Tue, 18 Nov 2025 15:01:49 -0800
Subject: [PATCH 1/8] Change dependnecy update function to operate on
 index.html only rather then copy whls over

---
 s3_management/update_dependencies.py | 188 ++++++++++++++++-----------
 1 file changed, 110 insertions(+), 78 deletions(-)
diff --git a/s3_management/update_dependencies.py b/s3_management/update_dependencies.py
index 904b2d428d..3e922efc01 100644
--- a/s3_management/update_dependencies.py
+++ b/s3_management/update_dependencies.py
@@ -544,6 +544,19 @@
 }
 
 
+def is_nvidia_package(pkg_name: str) -> bool:
+    """Check if a package is from NVIDIA and should use pypi.nvidia.com"""
+    return pkg_name.startswith("nvidia-")
+
+
+def get_package_source_url(pkg_name: str) -> str:
+    """Get the source URL for a package based on its type"""
+    if is_nvidia_package(pkg_name):
+        return f"https://pypi.nvidia.com/{pkg_name}/"
+    else:
+        return f"https://pypi.org/simple/{pkg_name}/"
+
+
 def download(url: str) -> bytes:
     from urllib.request import urlopen
 
@@ -551,101 +564,123 @@ def download(url: str) -> bytes:
         return conn.read()
 
 
-def is_stable(package_version: str) -> bool:
-    return bool(re.match(r"^([0-9]+\.)+[0-9]+$", package_version))
+def replace_relative_links_with_absolute(html: str, base_url: str) -> str:
+    """
+    Replace all relative links in HTML with absolute links.
+
+    Args:
+        html: HTML content as string
+        base_url: Base URL to prepend to relative links
+
+    Returns:
+        Modified HTML with absolute links
+    """
+    # Ensure base_url ends with /
+    if not base_url.endswith('/'):
+        base_url += '/'
+
+    # Pattern to match href attributes with relative URLs (not starting with http:// or https://)
+    def replace_href(match):
+        full_match = match.group(0)
+        url = match.group(1)
+
+        # If URL is already absolute, don't modify it
+        if url.startswith('http://') or url.startswith('https://') or url.startswith('//'):
+            return full_match
+
+        # Remove leading ./ or /
+        url = url.lstrip('./')
+        url = url.lstrip('/')
+
+        # Replace with absolute URL
+        return f'href="{base_url}{url}"'
+
+    # Replace href="..." patterns
+    html = re.sub(r'href="([^"]+)"', replace_href, html)
 
+    return html
 
-def parse_simple_idx(url: str) -> Dict[str, str]:
-    html = download(url).decode("ascii")
-    return {
+
+def parse_simple_idx(url: str) -> Tuple[Dict[str, str], str]:
+    """
+    Parse a simple package index and return package dict and raw HTML.
+
+    Returns:
+        Tuple of (package_dict, raw_html)
+    """
+    html = download(url).decode("utf-8", errors="ignore")
+    packages = {
         name: url
         for (url, name) in re.findall('<a href="([^"]+)"[^>]*>([^>]+)</a>', html)
     }
+    return packages, html
 
 
-def get_whl_versions(idx: Dict[str, str]) -> List[str]:
-    return [
-        k.split("-")[1]
-        for k in idx.keys()
-        if k.endswith(".whl") and is_stable(k.split("-")[1])
-    ]
+def upload_index_html(
+    pkg_name: str,
+    prefix: str,
+    html: str,
+    base_url: str,
+    *,
+    dry_run: bool = False,
+) -> None:
+    """Upload modified index.html to S3 with absolute links"""
+    # Replace relative links with absolute links
+    modified_html = replace_relative_links_with_absolute(html, base_url)
 
+    index_key = f"{prefix}/{pkg_name}/index.html"
 
-def get_wheels_of_version(idx: Dict[str, str], version: str) -> Dict[str, str]:
-    return {
-        k: v
-        for (k, v) in idx.items()
-        if k.endswith(".whl") and k.split("-")[1] == version
-    }
+    if dry_run:
+        print(f"Dry Run - not uploading index.html to s3://pytorch/{index_key}")
+        return
+
+    print(f"Uploading index.html to s3://pytorch/{index_key}")
+    BUCKET.Object(key=index_key).put(
+        ACL="public-read",
+        ContentType="text/html",
+        Body=modified_html.encode("utf-8")
+    )
 
 
-def upload_missing_whls(
-    pkg_name: str = "numpy",
-    prefix: str = "whl/test",
+def upload_package_using_simple_index(
+    pkg_name: str,
+    prefix: str,
     *,
     dry_run: bool = False,
-    only_pypi: bool = False,
-    target_version: str = "latest",
 ) -> None:
-    pypi_idx = parse_simple_idx(f"https://pypi.org/simple/{pkg_name}")
-    pypi_versions = get_whl_versions(pypi_idx)
-
-    # Determine which version to use
-    if target_version == "latest" or not target_version:
-        selected_version = pypi_versions[-1] if pypi_versions else None
-    elif target_version in pypi_versions:
-        selected_version = target_version
-    else:
-        print(
-            f"Warning: Version {target_version} not found for {pkg_name}, using latest"
-        )
-        selected_version = pypi_versions[-1] if pypi_versions else None
+    """
+    Upload package index.html from PyPI Simple Index.
+    Simply copies the index.html with absolute links - no wheel uploads or version filtering.
+    Works for both NVIDIA and non-NVIDIA packages.
+    """
+    source_url = get_package_source_url(pkg_name)
+    is_nvidia = is_nvidia_package(pkg_name)
+    
+    print(f"Processing {pkg_name} using {'NVIDIA' if is_nvidia else 'PyPI'} Simple Index: {source_url}")
 
-    if not selected_version:
-        print(f"No stable versions found for {pkg_name}")
+    # Parse the index and get raw HTML
+    try:
+        _, raw_html = parse_simple_idx(source_url)
+    except Exception as e:
+        print(f"Error fetching package {pkg_name}: {e}")
         return
 
-    pypi_latest_packages = get_wheels_of_version(pypi_idx, selected_version)
-
-    download_latest_packages: Dict[str, str] = {}
-    if not only_pypi:
-        download_idx = parse_simple_idx(
-            f"https://download.pytorch.org/{prefix}/{pkg_name}"
-        )
-        download_latest_packages = get_wheels_of_version(download_idx, selected_version)
-
-    has_updates = False
-    for pkg in pypi_latest_packages:
-        if pkg in download_latest_packages:
-            continue
-        # Skip pp packages
-        if "-pp3" in pkg:
-            continue
-        # Skip win32 packages
-        if "-win32" in pkg:
-            continue
-        # Skip muslinux packages
-        if "-musllinux" in pkg:
-            continue
-        print(f"Downloading {pkg}")
-        if dry_run:
-            has_updates = True
-            print(f"Dry Run - not Uploading {pkg} to s3://pytorch/{prefix}/")
-            continue
-        data = download(pypi_idx[pkg])
-        print(f"Uploading {pkg} to s3://pytorch/{prefix}/")
-        BUCKET.Object(key=f"{prefix}/{pkg}").put(
-            ACL="public-read", ContentType="binary/octet-stream", Body=data
-        )
-        has_updates = True
-    if not has_updates:
-        print(f"{pkg_name} is already at version {selected_version} for {prefix}")
+    # Upload modified index.html with absolute links
+    upload_index_html(
+        pkg_name,
+        prefix,
+        raw_html,
+        source_url,
+        dry_run=dry_run
+    )
+    
+    print(f"Successfully processed index.html for {pkg_name}")
 
 
 def main() -> None:
     from argparse import ArgumentParser
 
-    parser = ArgumentParser("Upload dependent packages to s3://pytorch")
+    parser = ArgumentParser("Upload dependent package indexes to s3://pytorch")
     # Get unique paths from the packages list
     project_paths = list(
         {
@@ -657,7 +692,6 @@ def main() -> None:
     project_paths += ["all"]
     parser.add_argument("--package", choices=project_paths, default="torch")
     parser.add_argument("--dry-run", action="store_true")
-    parser.add_argument("--only-pypi", action="store_true")
     parser.add_argument("--include-stable", action="store_true")
     args = parser.parse_args()
 
@@ -682,12 +716,10 @@ def main() -> None:
                 else:
                     full_path = f"{prefix}"
 
-                upload_missing_whls(
+                upload_package_using_simple_index(
                     pkg_name,
                     full_path,
-                    dry_run=args.dry_run,
-                    only_pypi=args.only_pypi,
-                    target_version=pkg_config["version"],
+                    dry_run=args.dry_run
                 )
 
 

From 6040f5613ce5f83c7fc6cfb7c63728c27f81dc53 Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Tue, 18 Nov 2025 15:13:36 -0800
Subject: [PATCH 2/8] fix

---
 s3_management/update_dependencies.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/s3_management/update_dependencies.py b/s3_management/update_dependencies.py
index 3e922efc01..cf7ca9b76d 100644
--- a/s3_management/update_dependencies.py
+++ b/s3_management/update_dependencies.py
@@ -655,7 +655,7 @@ def upload_package_using_simple_index(
     """
     source_url = get_package_source_url(pkg_name)
     is_nvidia = is_nvidia_package(pkg_name)
-    
+
     print(f"Processing {pkg_name} using {'NVIDIA' if is_nvidia else 'PyPI'} Simple Index: {source_url}")
 
     # Parse the index and get raw HTML
@@ -673,7 +673,7 @@ def upload_package_using_simple_index(
         source_url,
         dry_run=dry_run
     )
-    
+
     print(f"Successfully processed index.html for {pkg_name}")
 
 

From 201967fef6f8dd4100b080218fa5d6409177cb78 Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Tue, 18 Nov 2025 15:43:43 -0800
Subject: [PATCH 3/8] lint

---
 s3_management/update_dependencies.py | 34 ++++++++++++----------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/s3_management/update_dependencies.py b/s3_management/update_dependencies.py
index cf7ca9b76d..4268731e7c 100644
--- a/s3_management/update_dependencies.py
+++ b/s3_management/update_dependencies.py
@@ -576,8 +576,8 @@ def replace_relative_links_with_absolute(html: str, base_url: str) -> str:
         Modified HTML with absolute links
     """
     # Ensure base_url ends with /
-    if not base_url.endswith('/'):
-        base_url += '/'
+    if not base_url.endswith("/"):
+        base_url += "/"
 
     # Pattern to match href attributes with relative URLs (not starting with http:// or https://)
     def replace_href(match):
@@ -585,12 +585,16 @@ def replace_href(match):
         url = match.group(1)
 
         # If URL is already absolute, don't modify it
-        if url.startswith('http://') or url.startswith('https://') or url.startswith('//'):
+        if (
+            url.startswith("http://")
+            or url.startswith("https://")
+            or url.startswith("//")
+        ):
             return full_match
 
         # Remove leading ./ or /
-        url = url.lstrip('./')
-        url = url.lstrip('/')
+        url = url.lstrip("./")
+        url = url.lstrip("/")
 
         # Replace with absolute URL
         return f'href="{base_url}{url}"'
@@ -636,9 +640,7 @@ def upload_index_html(
 
     print(f"Uploading index.html to s3://pytorch/{index_key}")
     BUCKET.Object(key=index_key).put(
-        ACL="public-read",
-        ContentType="text/html",
-        Body=modified_html.encode("utf-8")
+        ACL="public-read", ContentType="text/html", Body=modified_html.encode("utf-8")
     )
 
 
@@ -656,7 +658,9 @@ def upload_package_using_simple_index(
     source_url = get_package_source_url(pkg_name)
     is_nvidia = is_nvidia_package(pkg_name)
 
-    print(f"Processing {pkg_name} using {'NVIDIA' if is_nvidia else 'PyPI'} Simple Index: {source_url}")
+    print(
+        f"Processing {pkg_name} using {'NVIDIA' if is_nvidia else 'PyPI'} Simple Index: {source_url}"
+    )
 
     # Parse the index and get raw HTML
     try:
@@ -666,13 +670,7 @@ def upload_package_using_simple_index(
         return
 
     # Upload modified index.html with absolute links
-    upload_index_html(
-        pkg_name,
-        prefix,
-        raw_html,
-        source_url,
-        dry_run=dry_run
-    )
+    upload_index_html(pkg_name, prefix, raw_html, source_url, dry_run=dry_run)
 
     print(f"Successfully processed index.html for {pkg_name}")
 
@@ -717,9 +715,7 @@ def main() -> None:
                     full_path = f"{prefix}"
 
                 upload_package_using_simple_index(
-                    pkg_name,
-                    full_path,
-                    dry_run=args.dry_run
+                    pkg_name, full_path, dry_run=args.dry_run
                 )
 
 

From 188069634cf7f2fa4013530e1d9959d4967019fd Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Wed, 19 Nov 2025 14:51:48 -0800
Subject: [PATCH 4/8] more fixes

---
 .github/workflows/update-s3-dependencies.yml |  5 --
 s3_management/manage.py                      | 58 +++++++++++++++-----
 s3_management/update_dependencies.py         |  2 +-
 3 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/update-s3-dependencies.yml b/.github/workflows/update-s3-dependencies.yml
index 5a728b1357..7d297aed43 100644
--- a/.github/workflows/update-s3-dependencies.yml
+++ b/.github/workflows/update-s3-dependencies.yml
@@ -1,11 +1,6 @@
 name: Update S3 HTML dependencies for download.pytorch.org nightly and test
 
 on:
-  push:
-    branches:
-      - main
-    paths:
-      - s3_management/update_dependencies.py
   workflow_dispatch:
     inputs:
       dryrun:
diff --git a/s3_management/manage.py b/s3_management/manage.py
index 71de731d69..77aabbc228 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -132,6 +132,7 @@
         "charset_normalizer",
         "cmake",
         "colorama",
+        "cuda_bindings",
         "fbgemm_gpu",
         "fbgemm_gpu_genai",
         "filelock",
@@ -218,7 +219,6 @@
         "torchvision_extra_decoders",
         "triton",
         "tqdm",
-        "typing_extensions",
         "typing_inspect",
         "urllib3",
         "xformers",
@@ -544,7 +544,7 @@ def to_simple_package_html(self, subdir: Optional[str], package_name: str) -> st
                 attributes += ' data-requires-python="&gt;=3.10"'
 
             out.append(
-                f'    <a href="/{obj.key}{maybe_fragment}"{attributes}>{path.basename(obj.key).replace("%2B","+")}</a><br/>'
+                f'    <a href="/{obj.key}{maybe_fragment}"{attributes}>{path.basename(obj.key).replace("%2B", "+")}</a><br/>'
             )
         # Adding html footer
         out.append("  </body>")
@@ -562,9 +562,34 @@ def to_simple_packages_html(
         out.append("<!DOCTYPE html>")
         out.append("<html>")
         out.append("  <body>")
-        for pkg_name in sorted(self.get_package_names(subdir)):
+
+        # Get packages from wheel files
+        packages_from_wheels = set(self.get_package_names(subdir))
+
+        # Also find packages that have index.html but no wheels
+        packages_with_index_only = set()
+        resolved_subdir = self._resolve_subdir(subdir)
+
+        # List all objects in the subdir to find packagename/index.html patterns
+        prefix_to_search = f"{resolved_subdir}/"
+        for obj in BUCKET.objects.filter(Prefix=prefix_to_search):
+            # Check if this is a packagename/index.html file
+            relative_key = obj.key[len(prefix_to_search):]
+            parts = relative_key.split("/")
+            if len(parts) == 2 and parts[1] == "index.html":
+                package_name = parts[0].replace("-", "_")
+                # Convert back to the format used in wheel names (use _ not -)
+                # But we need to check if this package already has wheels
+                if package_name.lower() not in {p.lower() for p in packages_from_wheels}:
+                    packages_with_index_only.add(package_name)
+                    print(f"INFO: Including package '{package_name}' (has index.html but no wheels)")
+
+        # Combine both sets of packages
+        all_packages = packages_from_wheels | packages_with_index_only
+
+        for pkg_name in sorted(all_packages):
             out.append(
-                f'    <a href="{pkg_name.lower().replace("_","-")}/">{pkg_name.replace("_","-")}</a><br/>'
+                f'    <a href="{pkg_name.lower().replace("_", "-")}/">{pkg_name.replace("_", "-")}</a><br/>'
             )
         # Adding html footer
         out.append("  </body>")
@@ -691,16 +716,19 @@ def fetch_metadata(self) -> None:
         # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
         regex_multipart_upload = r"^[A-Za-z0-9+/=]+=-[0-9]+$"
         with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
-            for idx, future in {
-                idx: executor.submit(
-                    lambda key: CLIENT.head_object(
-                        Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled"
-                    ),
-                    obj.orig_key,
-                )
-                for (idx, obj) in enumerate(self.objects)
-                if obj.size is None
-            }.items():
+            futures = {}
+            for idx, obj in enumerate(self.objects):
+                if obj.size is None:
+                    print(f"Fetching metadata for: {obj.orig_key}")
+                    future = executor.submit(
+                        lambda key: CLIENT.head_object(
+                            Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled"
+                        ),
+                        obj.orig_key,
+                    )
+                    futures[idx] = future
+        
+            for idx, future in futures.items():
                 response = future.result()
                 raw = response.get("ChecksumSHA256")
                 if raw and match(regex_multipart_upload, raw):
@@ -813,7 +841,7 @@ def main() -> None:
         )
         etime = time.time()
         print(
-            f"DEBUG: Fetched {len(idx.objects)} objects for '{prefix}' in {etime-stime:.2f} seconds"
+            f"DEBUG: Fetched {len(idx.objects)} objects for '{prefix}' in {etime - stime:.2f} seconds"
         )
         if args.compute_sha256:
             idx.compute_sha256()
diff --git a/s3_management/update_dependencies.py b/s3_management/update_dependencies.py
index 4268731e7c..9edc58ab45 100644
--- a/s3_management/update_dependencies.py
+++ b/s3_management/update_dependencies.py
@@ -605,7 +605,7 @@ def replace_href(match):
     return html
 
 
-def parse_simple_idx(url: str) -> Tuple[Dict[str, str], str]:
+def parse_simple_idx(url: str) -> tuple[Dict[str, str], str]:
     """
     Parse a simple package index and return package dict and raw HTML.
 

From 93e96f957bebfa01365fbed40d15fb748f3faafd Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Wed, 19 Nov 2025 17:31:32 -0800
Subject: [PATCH 5/8] more_fixes

---
 s3_management/manage.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 77aabbc228..aeb8e0f5a6 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -574,15 +574,19 @@ def to_simple_packages_html(
         prefix_to_search = f"{resolved_subdir}/"
         for obj in BUCKET.objects.filter(Prefix=prefix_to_search):
             # Check if this is a packagename/index.html file
-            relative_key = obj.key[len(prefix_to_search):]
+            relative_key = obj.key[len(prefix_to_search) :]
             parts = relative_key.split("/")
             if len(parts) == 2 and parts[1] == "index.html":
                 package_name = parts[0].replace("-", "_")
                 # Convert back to the format used in wheel names (use _ not -)
                 # But we need to check if this package already has wheels
-                if package_name.lower() not in {p.lower() for p in packages_from_wheels}:
+                if package_name.lower() not in {
+                    p.lower() for p in packages_from_wheels
+                }:
                     packages_with_index_only.add(package_name)
-                    print(f"INFO: Including package '{package_name}' (has index.html but no wheels)")
+                    print(
+                        f"INFO: Including package '{package_name}' in {prefix_to_search} (has index.html but no wheels)"
+                    )
 
         # Combine both sets of packages
         all_packages = packages_from_wheels | packages_with_index_only
@@ -612,8 +616,11 @@ def upload_libtorch_html(self) -> None:
     def upload_pep503_htmls(self) -> None:
         for subdir in self.subdirs:
             index_html = self.to_simple_packages_html(subdir=subdir)
+
             for bucket in INDEX_BUCKETS:
                 print(f"INFO Uploading {subdir}/index.html to {bucket.name}")
+                print(f"{index_html}")
+
                 bucket.Object(key=f"{subdir}/index.html").put(
                     ACL="public-read",
                     CacheControl="no-cache,no-store,must-revalidate",
@@ -727,7 +734,7 @@ def fetch_metadata(self) -> None:
                         obj.orig_key,
                     )
                     futures[idx] = future
-        
+
             for idx, future in futures.items():
                 response = future.result()
                 raw = response.get("ChecksumSHA256")

From a9ca0fa4ae6e7f64210c05da48df035351dcc064 Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Wed, 19 Nov 2025 17:34:01 -0800
Subject: [PATCH 6/8] more

---
 s3_management/manage.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index aeb8e0f5a6..e4679aeb24 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -219,6 +219,7 @@
         "torchvision_extra_decoders",
         "triton",
         "tqdm",
+        "typing_extensions",
         "typing_inspect",
         "urllib3",
         "xformers",

From 323ce775c2f2c37895c3e2bfdcf2952c2372ed8a Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Wed, 19 Nov 2025 17:37:25 -0800
Subject: [PATCH 7/8] fix

---
 s3_management/manage.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index e4679aeb24..f63caa97df 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -620,8 +620,6 @@ def upload_pep503_htmls(self) -> None:
 
             for bucket in INDEX_BUCKETS:
                 print(f"INFO Uploading {subdir}/index.html to {bucket.name}")
-                print(f"{index_html}")
-
                 bucket.Object(key=f"{subdir}/index.html").put(
                     ACL="public-read",
                     CacheControl="no-cache,no-store,must-revalidate",

From 7829ad8ed7b10bbc9d336256faa66bba995d89d1 Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Wed, 19 Nov 2025 17:42:31 -0800
Subject: [PATCH 8/8] test

---
 s3_management/manage.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index f63caa97df..9ded3453f3 100755
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -725,7 +725,6 @@ def fetch_metadata(self) -> None:
             futures = {}
             for idx, obj in enumerate(self.objects):
                 if obj.size is None:
-                    print(f"Fetching metadata for: {obj.orig_key}")
                     future = executor.submit(
                         lambda key: CLIENT.head_object(
                             Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled"